diff --git a/autogtp/Management.cpp b/autogtp/Management.cpp index 069b0bfce..d6f55ec3b 100644 --- a/autogtp/Management.cpp +++ b/autogtp/Management.cpp @@ -71,8 +71,16 @@ void Management::runTuningProcess(const QString &tuneCmdLine) { tuneProcess.waitForStarted(-1); while (tuneProcess.state() == QProcess::Running) { tuneProcess.waitForReadyRead(1000); + QByteArray text = tuneProcess.readAllStandardOutput(); + int version_start = text.indexOf("Leela Zero ") + 11; + if (version_start > 10) { + int version_end = text.indexOf(" ", version_start); + m_leelaversion = QString(text.mid(version_start, version_end - version_start)); + } + QTextStream(stdout) << text; QTextStream(stdout) << tuneProcess.readAllStandardError(); } + QTextStream(stdout) << "Found Leela Version : " << m_leelaversion << endl; tuneProcess.waitForFinished(-1); } @@ -316,6 +324,8 @@ Order Management::getWorkInternal(bool tuning) { prog_cmdline.append("0"); } else { prog_cmdline.append(QString::number(AUTOGTP_VERSION)); + if (!m_leelaversion.isEmpty()) + prog_cmdline.append("/"+m_leelaversion); } QProcess curl; curl.start(prog_cmdline); diff --git a/autogtp/Management.h b/autogtp/Management.h index cb6715c13..4cdba6a6b 100644 --- a/autogtp/Management.h +++ b/autogtp/Management.h @@ -82,6 +82,7 @@ public slots: int m_threadsLeft; bool m_delNetworks; QLockFile *m_lockFile; + QString m_leelaversion; Order getWorkInternal(bool tuning); Order getWork(bool tuning = false); diff --git a/msvc/VS2015/leela-zero.vcxproj b/msvc/VS2015/leela-zero.vcxproj index dc31e6f6e..885d1efab 100644 --- a/msvc/VS2015/leela-zero.vcxproj +++ b/msvc/VS2015/leela-zero.vcxproj @@ -1,4 +1,4 @@ - + @@ -93,6 +93,7 @@ + @@ -121,6 +122,7 @@ + diff --git a/src/GTP.cpp b/src/GTP.cpp index efb2a402f..e2f6630b4 100644 --- a/src/GTP.cpp +++ b/src/GTP.cpp @@ -65,6 +65,10 @@ std::vector cfg_gpus; bool cfg_sgemm_exhaustive; bool cfg_tune_only; #endif +int cfg_puct_factor; +float cfg_backup_pct; +int cfg_backup_type; +bool cfg_pseudo_backup; float cfg_puct; float cfg_softmax_temp; float cfg_fpu_reduction; @@ -95,6 +99,10 @@ void GTP::setup_default_parameters() { cfg_sgemm_exhaustive = false; cfg_tune_only = false; #endif + cfg_puct_factor = 1; + cfg_backup_pct = 50.0; + cfg_backup_type = 3; + cfg_pseudo_backup = true; cfg_puct = 0.8f; cfg_softmax_temp = 1.0f; cfg_fpu_reduction = 0.25f; diff --git a/src/GTP.h b/src/GTP.h index 2e5c00d1e..7684da094 100644 --- a/src/GTP.h +++ b/src/GTP.h @@ -48,6 +48,8 @@ extern std::vector cfg_gpus; extern bool cfg_sgemm_exhaustive; extern bool cfg_tune_only; #endif +extern int cfg_puct_factor; +extern float cfg_backup_pct; extern float cfg_puct; extern float cfg_softmax_temp; extern float cfg_fpu_reduction; diff --git a/src/Leela.cpp b/src/Leela.cpp index 9a17b160e..38d3850a6 100644 --- a/src/Leela.cpp +++ b/src/Leela.cpp @@ -77,6 +77,22 @@ static void parse_commandline(int argc, char *argv[]) { ("noponder", "Disable thinking on opponent's time.") ("benchmark", "Test network and exit. Default args:\n-v3200 --noponder " "-m0 -t1 -s1.") + + ("puct-factor", po::value()->default_value(cfg_puct_factor), + "0: original (=1), 1: linear (=winrate*2, default), 2: quadratic (=winrate(1-winrate)*4).") + ("backup-pct", po::value()->default_value(cfg_backup_pct), + "Update (backup) Q-values (winrates) of white's moves that are ancestors of the new leaf node " + "with a probability determined by winrate at root node and this parameter.\n" + "At most 100, defaulted to 50.\n" + "The lower the value, the weaker you assume white to be.") + ("backup-type", po::value()->default_value(cfg_backup_type), + "0: throw a dice to go up a generation,\n" + "1: always update, 2: never update,\n" + "3: throw dice once for each simulation (default),\n" + "4: throw dice once for each ancestor,\n" + "5: update the foremost ancestors only.") + ("pseudo-backup", po::value()->default_value("on"), + "[on|off] Whether to increment visit count when value is not actually updated.") ; #ifdef USE_OPENCL po::options_description gpu_desc("GPU options"); @@ -102,13 +118,13 @@ static void parse_commandline(int argc, char *argv[]) { po::value()->default_value(cfg_random_temp), "Temperature to use for random move selection.") ; -#ifdef USE_TUNER po::options_description tuner_desc("Tuning options"); tuner_desc.add_options() ("puct", po::value()) ("softmax_temp", po::value()) ("fpu_reduction", po::value()) ; +#ifdef USE_TUNER #endif // These won't be shown, we use them to catch incorrect usage of the // command line. @@ -167,7 +183,26 @@ static void parse_commandline(int argc, char *argv[]) { cfg_quiet = true; // Set this early to avoid unnecessary output. } -#ifdef USE_TUNER + if (vm.count("puct-factor")) { + cfg_puct_factor = vm["puct-factor"].as(); + } + if (vm.count("backup-pct")) { + cfg_backup_pct = vm["backup-pct"].as(); + if (cfg_backup_pct > 100.0) { + cfg_backup_pct = 50.0; + myprintf("Invalid backup percentage. Falling back to 50.0.\n"); + } + } + if (vm.count("backup-type")) { + cfg_backup_type = vm["backup-type"].as(); + } + if (vm.count("pseudo-backup")) { + auto pb = vm["pseudo-backup"].as(); + if (pb == "off") { + cfg_pseudo_backup = false; + } + } + if (vm.count("puct")) { cfg_puct = vm["puct"].as(); } @@ -177,6 +212,7 @@ static void parse_commandline(int argc, char *argv[]) { if (vm.count("fpu_reduction")) { cfg_fpu_reduction = vm["fpu_reduction"].as(); } +#ifdef USE_TUNER #endif if (vm.count("logfile")) { diff --git a/src/Network.cpp b/src/Network.cpp index d629afe06..6d4ef455e 100644 --- a/src/Network.cpp +++ b/src/Network.cpp @@ -970,8 +970,8 @@ Network::Netresult Network::get_scored_moves_internal( const auto winrate_out = innerproduct<256, 1, false>(winrate_data, ip2_val_w, ip2_val_b); - // Sigmoid - const auto winrate_sig = (1.0f + std::tanh(winrate_out[0])) / 2.0f; + // Sigmoid: tanh normalized to take value in (0,1) + const auto winrate_sig = 1.0f / (1.0f + std::exp(-2.0f * winrate_out[0])); Netresult result; diff --git a/src/SMP.cpp b/src/SMP.cpp index d46756c18..7e387c0ad 100644 --- a/src/SMP.cpp +++ b/src/SMP.cpp @@ -32,7 +32,12 @@ SMP::Lock::Lock(Mutex & m) { void SMP::Lock::lock() { assert(!m_owns_lock); - while (m_mutex->m_lock.exchange(true, std::memory_order_acquire) == true); + // Test and Test-and-Set reduces memory contention + // However, just trying to Test-and-Set first improves performance in almost + // all cases + while (m_mutex->m_lock.exchange(true, std::memory_order_acquire)) { + while (m_mutex->m_lock.load(std::memory_order_relaxed)); + } m_owns_lock = true; } diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp index 6d35b95f6..80023cab5 100644 --- a/src/UCTNode.cpp +++ b/src/UCTNode.cpp @@ -87,6 +87,7 @@ bool UCTNode::create_children(std::atomic& nodecount, if (state.board.white_to_move()) { m_net_eval = 1.0f - m_net_eval; } + update(m_net_eval); eval = m_net_eval; std::vector nodelist; @@ -255,11 +256,13 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) { // Count parentvisits manually to avoid issues with transpositions. auto total_visited_policy = 0.0f; auto parentvisits = size_t{0}; + auto parent_total_blackeval = get_pure_eval(FastBoard::BLACK); for (const auto& child : m_children) { if (child.valid()) { parentvisits += child.get_visits(); if (child.get_visits() > 0) { total_visited_policy += child.get_score(); + parent_total_blackeval += child.get_blackevals(); } } } @@ -270,9 +273,18 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) { // Do not do this if we have introduced noise at this node exactly // to explore more. - auto pure_eval = get_pure_eval(color); + parentvisits++; + auto pure_eval = parent_total_blackeval / float(parentvisits); + if (color == FastBoard::WHITE) { + pure_eval = 1.0 - pure_eval; + } if (!is_root || !cfg_noise) { - fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy) * pure_eval / 0.5; + fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy); + if (cfg_puct_factor == 2) { + fpu_reduction *= pure_eval * (1 - pure_eval) / 0.25; + } else if (cfg_puct_factor == 1) { + fpu_reduction *= pure_eval / 0.5; + } } // Estimated eval for unknown nodes = current parent winrate - reduction auto fpu_eval = pure_eval - fpu_reduction; @@ -291,7 +303,12 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) { } auto psa = child.get_score(); auto denom = 1.0 + child.get_visits(); - auto puct = cfg_puct * psa * (numerator / denom) * pure_eval / 0.5; + auto puct = cfg_puct * psa * (numerator / denom); + if (cfg_puct_factor == 2) { + puct *= pure_eval * (1 - pure_eval) / 0.25; + } else if (cfg_puct_factor == 1) { + puct *= pure_eval / 0.5; + } auto value = winrate + puct; assert(value > std::numeric_limits::lowest()); diff --git a/src/UCTNodePointer.cpp b/src/UCTNodePointer.cpp index bb51f83b0..b026d95fa 100644 --- a/src/UCTNodePointer.cpp +++ b/src/UCTNodePointer.cpp @@ -90,6 +90,11 @@ float UCTNodePointer::get_eval(int tomove) const { return read_ptr()->get_eval(tomove); } +double UCTNodePointer::get_blackevals() const { + assert(is_inflated()); + return read_ptr()->get_blackevals(); +} + int UCTNodePointer::get_move() const { if (is_inflated()) return read_ptr()->get_move(); return read_vertex(); diff --git a/src/UCTNodePointer.h b/src/UCTNodePointer.h index d40bbc2ff..19d7dcfca 100644 --- a/src/UCTNodePointer.h +++ b/src/UCTNodePointer.h @@ -110,6 +110,7 @@ class UCTNodePointer { int get_move() const; // this can only be called if it is an inflated pointer float get_eval(int tomove) const; + double get_blackevals() const; }; #endif diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp index 63dfe254b..a23e77c12 100644 --- a/src/UCTSearch.cpp +++ b/src/UCTSearch.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "FastBoard.h" @@ -31,6 +32,7 @@ #include "FullBoard.h" #include "GTP.h" #include "GameState.h" +#include "Random.h" #include "TimeControl.h" #include "Timing.h" #include "Training.h" @@ -157,8 +159,19 @@ float UCTSearch::get_min_psa_ratio() const { return 0.0f; } +float calc_backup_pct (float blackeval) { + // dynamically adjust backup_pct according to root winrate + if (blackeval > 0.5) { + return (100.0 - cfg_backup_pct) * 4.0 * blackeval * (1 - blackeval) + cfg_backup_pct; + } else { + return 100.0; + } +} + SearchResult UCTSearch::play_simulation(GameState & currstate, - UCTNode* const node) { + UCTNode* const node, + float backup_pct, + int depth) { const auto color = currstate.get_to_move(); auto result = SearchResult{}; @@ -168,6 +181,7 @@ SearchResult UCTSearch::play_simulation(GameState & currstate, if (currstate.get_passes() >= 2) { auto score = currstate.final_score(); result = SearchResult::from_score(score); + node->update(result.eval()); } else if (m_nodes < MAX_TREE_SIZE) { float eval; const auto had_children = node->has_children(); @@ -179,6 +193,9 @@ SearchResult UCTSearch::play_simulation(GameState & currstate, } } } + if (result.valid()) { + result.remaining_backups = - depth * (1.0 - backup_pct / 100.0); + } if (node->has_children() && !result.valid()) { auto next = node->uct_select_child(color, node == m_root.get()); @@ -188,13 +205,48 @@ SearchResult UCTSearch::play_simulation(GameState & currstate, if (move != FastBoard::PASS && currstate.superko()) { next->invalidate(); } else { - result = play_simulation(currstate, next); + if (backup_pct > 100.0) { + backup_pct = calc_backup_pct(node->get_pure_eval(FastBoard::BLACK)); + } + result = play_simulation(currstate, next, backup_pct, depth + 1); + result.remaining_backups++; + if (result.valid()) { + if (color == FastBoard::BLACK || result.backup_type == 1 || node->get_visits() == 0) { + node->update(result.eval()); + } + else if (result.backup_type == 2) { + if (cfg_pseudo_backup) { + node->update(node->get_pure_eval(FastBoard::BLACK)); + } + } + else if (result.backup_type == 5) { + if (std::uniform_real_distribution{ 0.0, 1.0 }(Random::get_Rng()) <= result.remaining_backups) { + node->update(result.eval()); + } + else { + if (cfg_pseudo_backup) { + node->update(node->get_pure_eval(FastBoard::BLACK)); + } + } + } + else if (std::uniform_real_distribution{ 0.0, 100.0 }(Random::get_Rng()) <= backup_pct) { + node->update(result.eval()); + if (result.backup_type == 3) { + result.backup_type = 1; + } + } + else { + if (cfg_pseudo_backup) { + node->update(node->get_pure_eval(FastBoard::BLACK)); + } + if (result.backup_type == 0 || result.backup_type == 3) { + result.backup_type = 2; + } + } + } } } - if (result.valid()) { - node->update(result.eval()); - } node->virtual_loss_undo(); return result; @@ -586,7 +638,7 @@ bool UCTSearch::stop_thinking(int elapsed_centis, int time_for_move) const { void UCTWorker::operator()() { do { auto currstate = std::make_unique(m_rootstate); - auto result = m_search->play_simulation(*currstate, m_root); + auto result = m_search->play_simulation(*currstate, m_root, 200.0, 0); if (result.valid()) { m_search->increment_playouts(); } @@ -631,7 +683,7 @@ int UCTSearch::think(int color, passflag_t passflag) { do { auto currstate = std::make_unique(m_rootstate); - auto result = play_simulation(*currstate, m_root.get()); + auto result = play_simulation(*currstate, m_root.get(), 200.0, 0); if (result.valid()) { increment_playouts(); } @@ -707,7 +759,7 @@ void UCTSearch::ponder() { auto last_output = 0; do { auto currstate = std::make_unique(m_rootstate); - auto result = play_simulation(*currstate, m_root.get()); + auto result = play_simulation(*currstate, m_root.get(), 200.0, 0); if (result.valid()) { increment_playouts(); } diff --git a/src/UCTSearch.h b/src/UCTSearch.h index f2425039b..84d0721db 100644 --- a/src/UCTSearch.h +++ b/src/UCTSearch.h @@ -32,9 +32,13 @@ #include "GameState.h" #include "UCTNode.h" +extern int cfg_backup_type; +extern bool cfg_pseudo_backup; class SearchResult { public: + int backup_type; + float remaining_backups{0.0}; SearchResult() = default; bool valid() const { return m_valid; } float eval() const { return m_eval; } @@ -52,7 +56,7 @@ class SearchResult { } private: explicit SearchResult(float eval) - : m_valid(true), m_eval(eval) {} + : m_valid(true), m_eval(eval), backup_type(cfg_backup_type) {} bool m_valid{false}; float m_eval{0.0f}; }; @@ -98,7 +102,7 @@ class UCTSearch { void ponder(); bool is_running() const; void increment_playouts(); - SearchResult play_simulation(GameState& currstate, UCTNode* const node); + SearchResult play_simulation(GameState& currstate, UCTNode* const node, float backup_pct, int depth); private: float get_min_psa_ratio() const; diff --git a/training/tf/net_to_model.py b/training/tf/net_to_model.py index ebefd4303..80bd08a27 100755 --- a/training/tf/net_to_model.py +++ b/training/tf/net_to_model.py @@ -24,7 +24,7 @@ print("Blocks", blocks) tfprocess = TFProcess() -tfprocess.init(batch_size=1) +tfprocess.init(batch_size=1, gpus_num=1) if tfprocess.RESIDUAL_BLOCKS != blocks: raise ValueError("Number of blocks in tensorflow model doesn't match "\ "number of blocks in input network") diff --git a/training/tf/tfprocess.py b/training/tf/tfprocess.py index 0dba54c7a..7c3a1f47f 100644 --- a/training/tf/tfprocess.py +++ b/training/tf/tfprocess.py @@ -132,13 +132,13 @@ def __init__(self): self.swa_recalc_bn = True gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) - config = tf.ConfigProto(gpu_options=gpu_options) + config = tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True) self.session = tf.Session(config=config) self.training = tf.placeholder(tf.bool) self.global_step = tf.Variable(0, name='global_step', trainable=False) - def init(self, batch_size, macrobatch=1, logbase='leelalogs'): + def init(self, batch_size, macrobatch=1, gpus_num=None, logbase='leelalogs'): self.batch_size = batch_size self.macrobatch = macrobatch self.logbase = logbase @@ -159,13 +159,15 @@ def init(self, batch_size, macrobatch=1, logbase='leelalogs'): probs = tf.reshape(probs, (batch_size, 19*19 + 1)) winner = tf.reshape(winner, (batch_size, 1)) - self.init_net(planes, probs, winner) + if gpus_num is None: + gpus_num = self.gpus_num + self.init_net(planes, probs, winner, gpus_num) - def init_net(self, planes, probs, winner): + def init_net(self, planes, probs, winner, gpus_num): self.y_ = probs # (tf.float32, [None, 362]) - self.sx = tf.split(planes, self.gpus_num) - self.sy_ = tf.split(probs, self.gpus_num) - self.sz_ = tf.split(winner, self.gpus_num) + self.sx = tf.split(planes, gpus_num) + self.sy_ = tf.split(probs, gpus_num) + self.sz_ = tf.split(winner, gpus_num) self.batch_norm_count = 0 self.reuse_var = None @@ -182,7 +184,7 @@ def init_net(self, planes, probs, winner): tower_reg_term = [] tower_y_conv = [] with tf.variable_scope(tf.get_variable_scope()): - for i in range(self.gpus_num): + for i in range(gpus_num): with tf.device("/gpu:%d" % i): with tf.name_scope("tower_%d" % i): loss, policy_loss, mse_loss, reg_term, y_conv = self.tower_loss(