From 77582b9e90262d42b123f549bd480d7500da2b1d Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Mon, 26 Nov 2018 21:52:16 +0100
Subject: [PATCH 01/45] Make the README more clear.

Simplify instructions, especially related to building and running
when wanting to contribute.

Based on pull request #1983.
---
 README.md | 64 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 23 deletions(-)

diff --git a/README.md b/README.md
index 15bac5fc1..f2b567dc1 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,6 @@
 [![Windows Build Status](https://ci.appveyor.com/api/projects/status/pf1hcgly8f1a8iu0/branch/next?svg=true)](https://ci.appveyor.com/project/gcp/leela-zero/branch/next)
 
 
-
 # What
 
 A Go program with no human provided knowledge. Using MCTS (but without
@@ -51,10 +50,10 @@ after each game. You can just close the autogtp window to stop it.
 
 ### macOS and Linux
 
-Follow the instructions below to compile the leelaz binary, then go into
-the autogtp subdirectory and follow [the instructions there](autogtp/README.md)
-to build the autogtp binary. Copy the leelaz binary into the autogtp dir, and
-launch autogtp.
+Follow the instructions below to compile the leelaz and autogtp binaries in
+the build subdirectory. Then run autogtp as explained in the
+[contributing](#contributing) instructions below.
+Contributing will start when you run autogtp.
 
 ## Using a Cloud provider
 
@@ -66,15 +65,18 @@ There are community maintained instructions available here:
 
 * [Running Leela Zero client on a Tesla V100 GPU for free (Microsoft Azure Cloud Free Trial)](https://docs.google.com/document/d/1DMpi16Aq9yXXvGj0OOw7jbd7k2A9LHDUDxxWPNHIRPQ/edit?usp=sharing)
 
-# I just want to play right now
+# I just want to play with Leela Zero right now
 
-Download the best known network weights file from: https://zero.sjeng.org/best-network
+Download the best known network weights file from [here](https://zero.sjeng.org/best-network), or, if you prefer a more human style,
+a (weaker) network trained from human games [here](https://sjeng.org/zero/best_v1.txt.zip).
 
-And head to the [Usage](#usage) section of this README.
+If you are on Windows, download an official release from [here](https://github.com/gcp/leela-zero/releases) and head to the [Usage](#usage-for-playing-or-analyzing-games)
+section of this README.
 
-If you prefer a more human style, a network trained from human games is available here: https://sjeng.org/zero/best_v1.txt.zip.
+If you are on Unix or macOS, you have to compile the program yourself. Follow
+the compilation instructions below and then read the [Usage](#usage-for-playing-or-analyzing-games) section.
 
-# Compiling
+# Compiling AutoGTP and/or Leela Zero
 
 ## Requirements
 
@@ -91,7 +93,7 @@ by adding -DUSE_CPU_ONLY=1 to the cmake command line.
 * Optional: BLAS Library: OpenBLAS (libopenblas-dev) or Intel MKL
 * The program has been tested on Windows, Linux and macOS.
 
-## Example of compiling and running - Ubuntu & similar
+## Example of compiling - Ubuntu & similar
 
     # Test for OpenCL support & compatibility
     sudo apt install clinfo && clinfo
@@ -104,15 +106,17 @@ by adding -DUSE_CPU_ONLY=1 to the cmake command line.
     # Install build depedencies
     sudo apt install libboost-dev libboost-program-options-dev libboost-filesystem-dev opencl-headers ocl-icd-libopencl1 ocl-icd-opencl-dev zlib1g-dev
 
-    # Use stand alone directory to keep source dir clean
+    # Use a stand alone build directory to keep source dir clean
     mkdir build && cd build
+
+    # Compile leelaz and autogtp in build subdirectory with cmake
     cmake ..
     cmake --build .
+
+    # Optional: test if your build works correctly
     ./tests
-    curl -O https://zero.sjeng.org/best-network
-    ./leelaz --weights best-network
 
-## Example of compiling and running - macOS
+## Example of compiling - macOS
 
     # Clone github repo
     git clone https://github.com/gcp/leela-zero
@@ -122,34 +126,48 @@ by adding -DUSE_CPU_ONLY=1 to the cmake command line.
     # Install build depedencies
     brew install boost cmake
 
-    # Use stand alone directory to keep source dir clean
+    # Use a stand alone build directory to keep source dir clean
     mkdir build && cd build
+
+    # Compile leelaz and autogtp in build subdirectory with cmake
     cmake ..
     cmake --build .
+
+    # Optional: test if your build works correctly
     ./tests
-    curl -O https://zero.sjeng.org/best-network
-    ./leelaz --weights best-network
 
-## Example of compiling and running - Windows
+## Example of compiling - Windows
 
     # Clone github repo
     git clone https://github.com/gcp/leela-zero
     cd leela-zero
     git submodule update --init --recursive
+
     cd msvc
     Double-click the leela-zero2015.sln or leela-zero2017.sln corresponding
     to the Visual Studio version you have.
     # Build from Visual Studio 2015 or 2017
-    # Download <https://zero.sjeng.org/best-network> to msvc\x64\Release
-    msvc\x64\Release\leelaz.exe --weights best-network
 
-# Usage
+# Contributing
 
-The engine supports the [GTP protocol, version 2](https://www.lysator.liu.se/~gunnar/gtp/gtp2-spec-draft2/gtp2-spec.html).
+For Windows, you can use a release package, see ["I want to help"](#windows).
+
+Unix and macOS, after finishing the compile and while in the build directory:
+
+    # Copy leelaz binary to autogtp subdirectory
+    cp leelaz autogtp
+
+    # Run AutoGTP to start contributing
+    ./autogtp/autogtp
+
+
+# Usage for playing or analyzing games
 
 Leela Zero is not meant to be used directly. You need a graphical interface
 for it, which will interface with Leela Zero through the GTP protocol.
 
+The engine supports the [GTP protocol, version 2](https://www.lysator.liu.se/~gunnar/gtp/gtp2-spec-draft2/gtp2-spec.html).
+
 [Lizzie](https://github.com/featurecat/lizzie/releases) is a client specifically
 for Leela Zero which shows live search probilities, a win rate graph, and has
 an automatic game analysis mode. Has binaries for Windows, Mac, and Linux.

From 8daa0cdb174a81e6b2c91da71cb8d53a77ffc6b7 Mon Sep 17 00:00:00 2001
From: Hersmunch <hersee_a@hotmail.com>
Date: Thu, 29 Nov 2018 14:40:48 +0000
Subject: [PATCH 02/45] Refactor to allow AutoGTP to use Engine.

* Move Engine to Game.h and refactor autogtp to use it too.
* Fix initialization of job engines.

Pull request #2029.
---
 autogtp/Game.cpp          | 20 +++++--------------
 autogtp/Game.h            | 42 +++++++++++++++++++++++++++++----------
 autogtp/Job.cpp           | 36 ++++++++++++++++++---------------
 autogtp/Job.h             |  8 ++++----
 validation/Validation.cpp | 10 ++++------
 validation/Validation.h   | 15 --------------
 6 files changed, 65 insertions(+), 66 deletions(-)

diff --git a/autogtp/Game.cpp b/autogtp/Game.cpp
index 530e83ce2..a873e2020 100644
--- a/autogtp/Game.cpp
+++ b/autogtp/Game.cpp
@@ -23,25 +23,15 @@
 #include <QFileInfo>
 #include "Game.h"
 
-Game::Game(const QString& weights, const QString& opt, const QString& binary,
-           const QStringList& commands) :
+Game::Game(const Engine& engine) :
     QProcess(),
-    m_cmdLine(""),
-    m_binary(binary),
-    m_commands(commands),
+    m_engine(engine),
     m_resignation(false),
     m_blackToMove(true),
     m_blackResigned(false),
     m_passes(0),
     m_moveNum(0)
 {
-#ifdef WIN32
-    m_binary.append(".exe");
-#endif
-    if (!QFileInfo::exists(m_binary)) {
-        m_binary.remove(0, 2); // ./leelaz -> leelaz
-    }
-    m_cmdLine = m_binary + " " + opt + " " + weights;
     m_fileName = QUuid::createUuid().toRfc4122().toHex();
 }
 
@@ -171,7 +161,7 @@ void Game::checkVersion(const VersionTuple &min_version) {
 }
 
 bool Game::gameStart(const VersionTuple &min_version) {
-    start(m_cmdLine);
+    start(m_engine.getCmdLine());
     if (!waitForStarted()) {
         error(Game::NO_LEELAZ);
         return false;
@@ -180,7 +170,7 @@ bool Game::gameStart(const VersionTuple &min_version) {
     // check any return values.
     checkVersion(min_version);
     QTextStream(stdout) << "Engine has started." << endl;
-    for (auto command : m_commands) {
+    for (auto command : m_engine.m_commands) {
         QTextStream(stdout) << command << endl;
         if (!sendGtpCommand(command))
         {
@@ -354,7 +344,7 @@ bool Game::loadSgf(const QString &fileName) {
     return sendGtpCommand(qPrintable("loadsgf " + fileName + ".sgf"));
 }
 
-bool Game::fixSgf(QString& weightFile, bool resignation) {
+bool Game::fixSgf(const QString& weightFile, const bool resignation) {
     QFile sgfFile(m_fileName + ".sgf");
     if (!sgfFile.open(QIODevice::Text | QIODevice::ReadOnly)) {
         return false;
diff --git a/autogtp/Game.h b/autogtp/Game.h
index bbd404926..2c8cef801 100644
--- a/autogtp/Game.h
+++ b/autogtp/Game.h
@@ -19,17 +19,43 @@
 #ifndef GAME_H
 #define GAME_H
 
+#include <QFileInfo>
 #include <QProcess>
 #include <tuple>
 
 using VersionTuple = std::tuple<int, int, int>;
 
+class Engine {
+public:
+    Engine(const QString& network,
+           const QString& options,
+           const QStringList& commands = QStringList("time_settings 0 1 0"),
+           const QString& binary = QString("./leelaz")) :
+        m_binary(binary), m_options(options),
+        m_network(network), m_commands(commands) {
+#ifdef WIN32
+        m_binary.append(".exe");
+#endif
+        if (!QFileInfo::exists(m_binary)) {
+            m_binary.remove(0, 2); // ./leelaz -> leelaz
+        }
+    }
+    Engine() = default;
+    QString getCmdLine(void) const {
+        return m_binary + " " + m_options + " " + m_network;
+    }
+    QString getNetworkFile(void) const {
+        return QFileInfo(m_network).baseName();
+    }
+    QString m_binary;
+    QString m_options;
+    QString m_network;
+    QStringList m_commands;
+};
+
 class Game : QProcess {
 public:
-    Game(const QString& weights,
-         const QString& opt,
-         const QString& binary = QString("./leelaz"),
-         const QStringList& commands = QStringList("time_settings 0 1 0"));
+    Game(const Engine& engine);
     ~Game() = default;
     bool gameStart(const VersionTuple& min_version);
     void move();
@@ -41,16 +67,14 @@ class Game : QProcess {
     bool writeSgf();
     bool loadTraining(const QString &fileName);
     bool saveTraining();
-    bool fixSgf(QString& weightFile, bool resignation);
+    bool fixSgf(const QString& weightFile, const bool resignation);
     bool dumpTraining();
-    QString getCmdLine() const { return m_cmdLine; }
     bool dumpDebug();
     void gameQuit();
     QString getMove() const { return m_moveDone; }
     QString getFile() const { return m_fileName; }
     bool setMove(const QString& m);
     bool checkGameEnd();
-    void setCmdLine(const QString& cmd)  { m_cmdLine = cmd; }
     int getWinner();
     QString getWinnerName() const { return m_winner; }
     int getMovesCount() const { return m_moveNum; }
@@ -68,9 +92,7 @@ class Game : QProcess {
         WRONG_GTP,
         LAUNCH_FAILURE
     };
-    QString m_cmdLine;
-    QString m_binary;
-    QStringList m_commands;
+    Engine m_engine;
     QString m_winner;
     QString m_fileName;
     QString m_moveDone;
diff --git a/autogtp/Job.cpp b/autogtp/Job.cpp
index a33ce518b..e1a6669b3 100644
--- a/autogtp/Job.cpp
+++ b/autogtp/Job.cpp
@@ -27,14 +27,12 @@
 
 Job::Job(QString gpu, Management *parent) :
     m_state(RUNNING),
-    m_option(""),
-  m_gpu(gpu),
-  m_boss(parent)
+    m_gpu(gpu),
+    m_boss(parent)
 {
 }
 
 void Job::init(const Order &o) {
-    m_option = " " + o.parameters()["options"] + m_gpu + " -g -q -w ";
     QStringList version_list = o.parameters()["leelazVer"].split(".");
     if (version_list.size() < 2) {
         QTextStream(stdout)
@@ -47,27 +45,29 @@ void Job::init(const Order &o) {
     std::get<0>(m_leelazMinVersion) = version_list[0].toInt();
     std::get<1>(m_leelazMinVersion) = version_list[1].toInt();
     std::get<2>(m_leelazMinVersion) = version_list[2].toInt();
-
 }
 
 ProductionJob::ProductionJob(QString gpu, Management *parent) :
-Job(gpu, parent)
+    Job(gpu, parent),
+    m_engine(Engine(QString(), QString()))
 {
 }
 
 ValidationJob::ValidationJob(QString gpu, Management *parent) :
-Job(gpu, parent)
+    Job(gpu, parent),
+    m_engineFirst(Engine(QString(), QString())),
+    m_engineSecond(Engine(QString(), QString()))
 {
 }
 
 WaitJob::WaitJob(QString gpu, Management *parent) :
-Job(gpu, parent)
+    Job(gpu, parent)
 {
 }
 
 Result ProductionJob::execute(){
     Result res(Result::Error);
-    Game game("networks/" + m_network + ".gz", m_option);
+    Game game(m_engine);
     if (!game.gameStart(m_leelazMinVersion)) {
         return res;
     }
@@ -91,7 +91,7 @@ Result ProductionJob::execute(){
         QTextStream(stdout) << "Game has ended." << endl;
         if (game.getScore()) {
             game.writeSgf();
-            game.fixSgf(m_network, false);
+            game.fixSgf(m_engine.getNetworkFile(), false);
             game.dumpTraining();
             if (m_debug) {
                 game.dumpDebug();
@@ -118,7 +118,8 @@ Result ProductionJob::execute(){
 
 void ProductionJob::init(const Order &o) {
     Job::init(o);
-    m_network = o.parameters()["network"];
+    m_engine.m_network = "networks/" + o.parameters()["network"] + ".gz";
+    m_engine.m_options = " " + o.parameters()["options"] + m_gpu + " -g -q -w ";
     m_debug = o.parameters()["debug"] == "true";
     if (o.type() == Order::RestoreSelfPlayed) {
         m_sgf = o.parameters()["sgf"];
@@ -131,7 +132,7 @@ void ProductionJob::init(const Order &o) {
 
 Result ValidationJob::execute(){
     Result res(Result::Error);
-    Game first("networks/" + m_firstNet + ".gz",  m_option);
+    Game first(m_engineFirst);
     if (!first.gameStart(m_leelazMinVersion)) {
         return res;
     }
@@ -140,7 +141,7 @@ Result ValidationJob::execute(){
         first.setMovesCount(m_moves);
         QFile::remove(m_sgfFirst + ".sgf");
     }
-    Game second("networks/" + m_secondNet + ".gz", m_option);
+    Game second(m_engineSecond);
     if (!second.gameStart(m_leelazMinVersion)) {
         return res;
     }
@@ -181,7 +182,8 @@ Result ValidationJob::execute(){
             res.add("score", first.getResult());
             res.add("winner", first.getWinnerName());
             first.writeSgf();
-            first.fixSgf(m_secondNet, (res.parameters()["score"] == "B+Resign"));
+            first.fixSgf(m_engineSecond.getNetworkFile(),
+                         (res.parameters()["score"] == "B+Resign"));
             res.add("file", first.getFile());
         }
         // Game is finished, send the result
@@ -205,8 +207,10 @@ Result ValidationJob::execute(){
 
 void ValidationJob::init(const Order &o) {
     Job::init(o);
-    m_firstNet = o.parameters()["firstNet"];
-    m_secondNet = o.parameters()["secondNet"];
+    m_engineFirst.m_network = "networks/" + o.parameters()["firstNet"] + ".gz";
+    m_engineFirst.m_options = " " + o.parameters()["options"] + m_gpu + " -g -q -w ";
+    m_engineSecond.m_network = "networks/" + o.parameters()["secondNet"] + ".gz";
+    m_engineSecond.m_options = " " + o.parameters()["options"] + m_gpu + " -g -q -w ";
     if (o.type() == Order::RestoreMatch) {
         m_sgfFirst = o.parameters()["sgfFirst"];
         m_sgfSecond = o.parameters()["sgfSecond"];
diff --git a/autogtp/Job.h b/autogtp/Job.h
index 8805265e6..b386b5d09 100644
--- a/autogtp/Job.h
+++ b/autogtp/Job.h
@@ -19,6 +19,7 @@
 #ifndef JOB_H
 #define JOB_H
 
+#include "Game.h"
 #include "Result.h"
 #include "Order.h"
 #include <QObject>
@@ -50,7 +51,6 @@ class Job : public QObject {
 
 protected:
     QAtomicInt m_state;
-    QString m_option;
     QString m_gpu;
     int m_moves;
     VersionTuple m_leelazMinVersion;
@@ -66,7 +66,7 @@ class ProductionJob : public Job {
     void init(const Order &o);
     Result execute();
 private:
-    QString m_network;
+    Engine m_engine;
     QString m_sgf;
     bool m_debug;
 };
@@ -79,8 +79,8 @@ class ValidationJob : public Job {
     void init(const Order &o);
     Result execute();
 private:
-    QString m_firstNet;
-    QString m_secondNet;
+    Engine m_engineFirst;
+    Engine m_engineSecond;
     QString m_sgfFirst;
     QString m_sgfSecond;
 };
diff --git a/validation/Validation.cpp b/validation/Validation.cpp
index 9ed1b1a18..757f59869 100644
--- a/validation/Validation.cpp
+++ b/validation/Validation.cpp
@@ -28,22 +28,20 @@ const VersionTuple min_leelaz_version{0, 16, 0};
 
 void ValidationWorker::run() {
     do {
-        Game first(m_engines[0].m_network, m_engines[0].m_options,
-                   m_engines[0].m_binary, m_engines[0].m_commands);
+        Game first(m_engines[0]);
         if (!first.gameStart(min_leelaz_version)) {
             emit resultReady(Sprt::NoResult, Game::BLACK);
             return;
         }
-        Game second(m_engines[1].m_network, m_engines[1].m_options,
-                    m_engines[1].m_binary, m_engines[1].m_commands);
+        Game second(m_engines[1]);
         if (!second.gameStart(min_leelaz_version)) {
             emit resultReady(Sprt::NoResult, Game::BLACK);
             return;
         }
         QTextStream(stdout) << "starting:" << endl <<
-            first.getCmdLine() << endl <<
+            m_engines[0].getCmdLine() << endl <<
             "vs" << endl <<
-            second.getCmdLine() << endl;
+            m_engines[1].getCmdLine() << endl;
 
         QString wmove = "play white ";
         QString bmove = "play black ";
diff --git a/validation/Validation.h b/validation/Validation.h
index 6dbd81ebe..24d5ef1a8 100644
--- a/validation/Validation.h
+++ b/validation/Validation.h
@@ -28,21 +28,6 @@
 #include "../autogtp/Game.h"
 #include "Results.h"
 
-class Engine {
-public:
-    Engine(const QString& network,
-           const QString& options,
-           const QStringList& commands = QStringList("time_settings 0 1 0"),
-           const QString& binary = QString("./leelaz")) :
-        m_binary(binary), m_options(options),
-        m_network(network), m_commands(commands) {}
-    Engine() = default;
-    QString m_binary;
-    QString m_options;
-    QString m_network;
-    QStringList m_commands;
-};
-
 class ValidationWorker : public QThread {
     Q_OBJECT
 public:

From 64097f0790f523e842336eafe08a3338132c51e0 Mon Sep 17 00:00:00 2001
From: ZenStone <m8r-hfhh6d@mailismagic.com>
Date: Wed, 5 Dec 2018 04:47:26 +0900
Subject: [PATCH 03/45] Fix printf call style.

Generally speaking, providing character pointers as the first argument
directly might cause FSB (Format String Bug).

Pull request #2063.
---
 src/GTP.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/GTP.cpp b/src/GTP.cpp
index 80b472887..934a4a2c9 100644
--- a/src/GTP.cpp
+++ b/src/GTP.cpp
@@ -101,8 +101,7 @@ void GTP::initialize(std::unique_ptr<Network>&& net) {
         myprintf("for the default settings on your system.\n");
         throw std::runtime_error("Error setting memory requirements.");
     }
-    myprintf(message.c_str());
-    myprintf("\n");
+    myprintf("%s\n", message.c_str());
 }
 
 void GTP::setup_default_parameters() {

From c157d0b7a0f3895193848b6342cedb8f2d93de94 Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Fri, 7 Dec 2018 08:17:47 -0500
Subject: [PATCH 04/45] Update Khronos OpenCL C++ headers.

Update from upstream f0b7045.

Fixes warnings related to CL_TARGET_OPENCL_VERSION.
---
 src/CL/cl2.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/CL/cl2.hpp b/src/CL/cl2.hpp
index 711b429e9..d49c156b4 100644
--- a/src/CL/cl2.hpp
+++ b/src/CL/cl2.hpp
@@ -447,6 +447,8 @@
 # undef CL_HPP_TARGET_OPENCL_VERSION
 # define CL_HPP_TARGET_OPENCL_VERSION 200
 #endif
+/* Forward target OpenCL version to C headers */
+#define CL_TARGET_OPENCL_VERSION CL_HPP_TARGET_OPENCL_VERSION
 
 #if !defined(CL_HPP_MINIMUM_OPENCL_VERSION)
 # define CL_HPP_MINIMUM_OPENCL_VERSION 200

From bc3e750186750aee6bbdbfeac65d977d1a5c11ed Mon Sep 17 00:00:00 2001
From: Seth Troisi <sethtroisi@google.com>
Date: Tue, 20 Nov 2018 02:34:20 -0800
Subject: [PATCH 05/45] Cleanup loop code.

Pull request #2033.
---
 src/Network.cpp | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/Network.cpp b/src/Network.cpp
index 1ba84ce25..933784070 100644
--- a/src/Network.cpp
+++ b/src/Network.cpp
@@ -875,14 +875,12 @@ void Network::show_heatmap(const FastState* const state,
         std::stable_sort(rbegin(moves), rend(moves));
 
         auto cum = 0.0f;
-        size_t tried = 0;
-        while (cum < 0.85f && tried < moves.size()) {
-            if (moves[tried].first < 0.01f) break;
+        for (const auto& move : moves) {
+            if (cum > 0.85f || move.first < 0.01f) break;
             myprintf("%1.3f (%s)\n",
-                    moves[tried].first,
-                    state->board.move_to_text(moves[tried].second).c_str());
-            cum += moves[tried].first;
-            tried++;
+                    move.first,
+                    state->board.move_to_text(move.second).c_str());
+            cum += move.first;
         }
     }
 }

From d166740f5ec2f3304ee046df747692cb901f8edc Mon Sep 17 00:00:00 2001
From: Hersmunch <hersee_a@hotmail.com>
Date: Wed, 19 Dec 2018 13:22:08 +0000
Subject: [PATCH 06/45] AutoGTP: allow specifying an SGF as initial position.

* Make AutoGTP URL parametric.
* Support for the sgfhash and movescount parameters in get-task.
* Automatic downloading of sgf and training files.
* Fix Management.cpp for older Qt5 versions.
* Added starting match games from specified initial position
* Tidy ValidationJob::init() like ProductionJob::init()
* Use existing QUuid method of generating random file
  names instead of QTemporaryFile when fetching game data.

Moreover, we do not load training data in LeelaZ since it is not needed to start from
an arbitrary position.

Pull request #2052.
---
 autogtp/Game.cpp       |  5 +++
 autogtp/Game.h         |  1 +
 autogtp/Job.cpp        | 68 +++++++++++++++-------------------
 autogtp/Job.h          |  4 +-
 autogtp/Management.cpp | 84 ++++++++++++++++++++++++++++++++----------
 autogtp/Management.h   |  1 +
 autogtp/Worker.cpp     |  4 +-
 7 files changed, 104 insertions(+), 63 deletions(-)

diff --git a/autogtp/Game.cpp b/autogtp/Game.cpp
index a873e2020..228fc1d70 100644
--- a/autogtp/Game.cpp
+++ b/autogtp/Game.cpp
@@ -344,6 +344,11 @@ bool Game::loadSgf(const QString &fileName) {
     return sendGtpCommand(qPrintable("loadsgf " + fileName + ".sgf"));
 }
 
+bool Game::loadSgf(const QString &fileName, const int moves) {
+    QTextStream(stdout) << "Loading " << fileName + ".sgf with " << moves << " moves" << endl;
+    return sendGtpCommand(qPrintable("loadsgf " + fileName + ".sgf " + QString::number(moves + 1)));
+}
+
 bool Game::fixSgf(const QString& weightFile, const bool resignation) {
     QFile sgfFile(m_fileName + ".sgf");
     if (!sgfFile.open(QIODevice::Text | QIODevice::ReadOnly)) {
diff --git a/autogtp/Game.h b/autogtp/Game.h
index 2c8cef801..5173d4ff2 100644
--- a/autogtp/Game.h
+++ b/autogtp/Game.h
@@ -64,6 +64,7 @@ class Game : QProcess {
     bool nextMove();
     bool getScore();
     bool loadSgf(const QString &fileName);
+    bool loadSgf(const QString &fileName, const int moves);
     bool writeSgf();
     bool loadTraining(const QString &fileName);
     bool saveTraining();
diff --git a/autogtp/Job.cpp b/autogtp/Job.cpp
index e1a6669b3..30db9d03e 100644
--- a/autogtp/Job.cpp
+++ b/autogtp/Job.cpp
@@ -72,11 +72,17 @@ Result ProductionJob::execute(){
         return res;
     }
     if (!m_sgf.isEmpty()) {
-        game.loadSgf(m_sgf);
-        game.loadTraining(m_sgf);
+        if (m_moves == 0) {
+            game.loadSgf(m_sgf);
+        } else {
+            game.loadSgf(m_sgf, m_moves);
+        }
         game.setMovesCount(m_moves);
+        if (m_restore) {
+            game.loadTraining(m_sgf);
+            QFile::remove(m_sgf + ".train");
+        }
         QFile::remove(m_sgf + ".sgf");
-        QFile::remove(m_sgf + ".train");
     }
     do {
         game.move();
@@ -103,9 +109,9 @@ Result ProductionJob::execute(){
         res.add("moves", QString::number(game.getMovesCount()));
         break;
     case STORING:
-        res.type(Result::StoreSelfPlayed);
         game.writeSgf();
         game.saveTraining();
+        res.type(Result::StoreSelfPlayed);
         res.add("sgf", game.getFile());
         res.add("moves", QString::number(game.getMovesCount()));
         break;
@@ -121,13 +127,9 @@ void ProductionJob::init(const Order &o) {
     m_engine.m_network = "networks/" + o.parameters()["network"] + ".gz";
     m_engine.m_options = " " + o.parameters()["options"] + m_gpu + " -g -q -w ";
     m_debug = o.parameters()["debug"] == "true";
-    if (o.type() == Order::RestoreSelfPlayed) {
-        m_sgf = o.parameters()["sgf"];
-        m_moves = o.parameters()["moves"].toInt();
-    } else {
-        m_sgf = "";
-        m_moves = 0;
-    }
+    m_sgf = o.parameters()["sgf"];
+    m_moves = o.parameters()["moves"].toInt();
+    m_restore = o.type() == Order::RestoreSelfPlayed;
 }
 
 Result ValidationJob::execute(){
@@ -136,19 +138,21 @@ Result ValidationJob::execute(){
     if (!first.gameStart(m_leelazMinVersion)) {
         return res;
     }
-    if (!m_sgfFirst.isEmpty()) {
-        first.loadSgf(m_sgfFirst);
-        first.setMovesCount(m_moves);
-        QFile::remove(m_sgfFirst + ".sgf");
-    }
     Game second(m_engineSecond);
     if (!second.gameStart(m_leelazMinVersion)) {
         return res;
     }
-    if (!m_sgfSecond.isEmpty()) {
-        second.loadSgf(m_sgfSecond);
+    if (!m_sgf.isEmpty()) {
+        if (m_moves == 0) {
+            first.loadSgf(m_sgf);
+            second.loadSgf(m_sgf);
+        } else {
+            first.loadSgf(m_sgf, m_moves);
+            second.loadSgf(m_sgf, m_moves);
+        }
+        first.setMovesCount(m_moves);
         second.setMovesCount(m_moves);
-        QFile::remove(m_sgfSecond + ".sgf");
+        QFile::remove(m_sgf + ".sgf");
     }
 
     QString wmove = "play white ";
@@ -176,25 +180,22 @@ Result ValidationJob::execute(){
 
     switch (m_state.load()) {
     case RUNNING:
-        res.add("moves", QString::number(first.getMovesCount()));
-       QTextStream(stdout) << "Game has ended." << endl;
+        QTextStream(stdout) << "Game has ended." << endl;
         if (first.getScore()) {
             res.add("score", first.getResult());
             res.add("winner", first.getWinnerName());
             first.writeSgf();
             first.fixSgf(m_engineSecond.getNetworkFile(),
-                         (res.parameters()["score"] == "B+Resign"));
+                (res.parameters()["score"] == "B+Resign"));
             res.add("file", first.getFile());
         }
-        // Game is finished, send the result
         res.type(Result::Win);
+        res.add("moves", QString::number(first.getMovesCount()));
         break;
     case STORING:
-        res.type(Result::StoreMatch);
         first.writeSgf();
-        second.writeSgf();
-        res.add("sgfFirst", first.getFile());
-        res.add("sgfSecond", second.getFile());
+        res.type(Result::StoreMatch);
+        res.add("sgf", first.getFile());
         res.add("moves", QString::number(first.getMovesCount()));
         break;
     default:
@@ -211,15 +212,8 @@ void ValidationJob::init(const Order &o) {
     m_engineFirst.m_options = " " + o.parameters()["options"] + m_gpu + " -g -q -w ";
     m_engineSecond.m_network = "networks/" + o.parameters()["secondNet"] + ".gz";
     m_engineSecond.m_options = " " + o.parameters()["options"] + m_gpu + " -g -q -w ";
-    if (o.type() == Order::RestoreMatch) {
-        m_sgfFirst = o.parameters()["sgfFirst"];
-        m_sgfSecond = o.parameters()["sgfSecond"];
-        m_moves = o.parameters()["moves"].toInt();
-    } else {
-        m_sgfFirst = "";
-        m_sgfSecond = "";
-        m_moves = 0;
-    }
+    m_sgf = o.parameters()["sgf"];
+    m_moves = o.parameters()["moves"].toInt();
 }
 
 Result WaitJob::execute(){
@@ -232,5 +226,3 @@ void WaitJob::init(const Order &o) {
     Job::init(o);
     m_minutes = o.parameters()["minutes"].toInt();
 }
-
-
diff --git a/autogtp/Job.h b/autogtp/Job.h
index b386b5d09..726e625c1 100644
--- a/autogtp/Job.h
+++ b/autogtp/Job.h
@@ -69,6 +69,7 @@ class ProductionJob : public Job {
     Engine m_engine;
     QString m_sgf;
     bool m_debug;
+    bool m_restore;
 };
 
 class ValidationJob : public Job {
@@ -81,8 +82,7 @@ class ValidationJob : public Job {
 private:
     Engine m_engineFirst;
     Engine m_engineSecond;
-    QString m_sgfFirst;
-    QString m_sgfSecond;
+    QString m_sgf;
 };
 
 class WaitJob : public Job {
diff --git a/autogtp/Management.cpp b/autogtp/Management.cpp
index cedeef12a..cb3e51172 100644
--- a/autogtp/Management.cpp
+++ b/autogtp/Management.cpp
@@ -34,6 +34,8 @@
 constexpr int RETRY_DELAY_MIN_SEC = 30;
 constexpr int RETRY_DELAY_MAX_SEC = 60 * 60;  // 1 hour
 constexpr int MAX_RETRIES = 3;           // Stop retrying after 3 times
+
+const QString server_url = "https://zero.sjeng.org/";
 const QString Leelaz_min_version = "0.12";
 
 Management::Management(const int gpus,
@@ -291,9 +293,11 @@ Order Management::getWorkInternal(bool tuning) {
        resignation_percent : "3",
        noise : "true",
        randomcnt : "30"
-    }
-    white_hash_gzip_hash: "23c29bf777e446b5c3fb0e6e7fa4d53f15b99cc0c25798b70b57877b55bf1638"
-    black_hash_gzip_hash: "ccfe6023456aaaa423c29bf777e4aab481245289aaaabb70b7b5380992377aa8"
+    },
+    white_hash_gzip_hash: "23c29bf777e446b5c3fb0e6e7fa4d53f15b99cc0c25798b70b57877b55bf1638",
+    black_hash_gzip_hash: "ccfe6023456aaaa423c29bf777e4aab481245289aaaabb70b7b5380992377aa8",
+    hash_sgf_hash: "7dbccc5ad9eb38f0135ff7ec860f0e81157f47dfc0a8375cef6bf1119859e537",
+    moves_count: "92"
 }
 
 {
@@ -309,8 +313,10 @@ Order Management::getWorkInternal(bool tuning) {
        resignation_percent : "3",
        noise : "true",
        randomcnt : "30"
-    }
-    hash_gzip_hash: "23c29bf777e446b5c3fb0e6e7fa4d53f15b99cc0c25798b70b57877b55bf1638"
+    },
+    hash_gzip_hash: "23c29bf777e446b5c3fb0e6e7fa4d53f15b99cc0c25798b70b57877b55bf1638",
+    hash_sgf_hash: "7dbccc5ad9eb38f0135ff7ec860f0e81157f47dfc0a8375cef6bf1119859e537",
+    moves_count: "92"
 }
 
 {
@@ -324,7 +330,7 @@ Order Management::getWorkInternal(bool tuning) {
     prog_cmdline.append(".exe");
 #endif
     prog_cmdline.append(" -s -J");
-    prog_cmdline.append(" https://zero.sjeng.org/get-task/");
+    prog_cmdline.append(" "+server_url+"get-task/");
     if (tuning) {
         prog_cmdline.append("0");
     } else {
@@ -382,8 +388,9 @@ Order Management::getWorkInternal(bool tuning) {
 
     //getting the random seed
     QString rndSeed = "0";
-    if (ob.contains("random_seed"))
-         rndSeed = ob.value("random_seed").toString();
+    if (ob.contains("random_seed")) {
+        rndSeed = ob.value("random_seed").toString();
+    }
     parameters["rndSeed"] = rndSeed;
     if (rndSeed == "0") {
         rndSeed = "";
@@ -394,6 +401,11 @@ Order Management::getWorkInternal(bool tuning) {
         parameters["optHash"] = ob.value("options_hash").toString();
         parameters["options"] = getOptionsString(ob.value("options").toObject(), rndSeed);
     }
+    if (ob.contains("hash_sgf_hash")) {
+        parameters["sgf"] = fetchGameData(ob.value("hash_sgf_hash").toString(), "sgf");
+        parameters["moves"] = ob.contains("moves_count") ?
+            ob.value("moves_count").toString() : "0";
+    }
 
     parameters["debug"] = !m_debugPath.isEmpty() ? "true" : "false";
 
@@ -404,19 +416,20 @@ Order Management::getWorkInternal(bool tuning) {
         QString net = ob.value("hash").toString();
         QString gzipHash = ob.value("hash_gzip_hash").toString();
         fetchNetwork(net, gzipHash);
-        o.type(Order::Production);
         parameters["network"] = net;
+
+        o.type(Order::Production);
         o.parameters(parameters);
         if (m_delNetworks &&
             m_fallBack.parameters()["network"] != net) {
-            QTextStream(stdout) << "Deleting network " << "networks/" + m_fallBack.parameters()["network"] + ".gz" << endl;
+            QTextStream(stdout) << "Deleting network " << "networks/"
+                + m_fallBack.parameters()["network"] + ".gz" << endl;
             QFile::remove("networks/" + m_fallBack.parameters()["network"] + ".gz");
         }
         m_fallBack = o;
         QTextStream(stdout) << "net: " << net << "." << endl;
     }
     if (ob.value("cmd").toString() == "match") {
-        o.type(Order::Validation);
         QString net1 = ob.value("black_hash").toString();
         QString gzipHash1 = ob.value("black_hash_gzip_hash").toString();
         QString net2 = ob.value("white_hash").toString();
@@ -425,16 +438,20 @@ Order Management::getWorkInternal(bool tuning) {
         fetchNetwork(net2, gzipHash2);
         parameters["firstNet"] = net1;
         parameters["secondNet"] = net2;
+
+        o.type(Order::Validation);
         o.parameters(parameters);
         if (m_delNetworks) {
             if (m_lastMatch.parameters()["firstNet"] != net1 &&
                 m_lastMatch.parameters()["firstNet"] != net2) {
-                QTextStream(stdout) << "Deleting network " << "networks/" + m_lastMatch.parameters()["firstNet"] + ".gz" << endl;
+                QTextStream(stdout) << "Deleting network " << "networks/"
+                    + m_lastMatch.parameters()["firstNet"] + ".gz" << endl;
                 QFile::remove("networks/" + m_lastMatch.parameters()["firstNet"] + ".gz");
             }
             if (m_lastMatch.parameters()["secondNet"] != net1 &&
                 m_lastMatch.parameters()["secondNet"] != net2) {
-                QTextStream(stdout) << "Deleting network " << "networks/" + m_lastMatch.parameters()["secondNet"] + ".gz" << endl;
+                QTextStream(stdout) << "Deleting network " << "networks/"
+                    + m_lastMatch.parameters()["secondNet"] + ".gz" << endl;
                 QFile::remove("networks/" + m_lastMatch.parameters()["secondNet"] + ".gz");
             }
         }
@@ -443,8 +460,9 @@ Order Management::getWorkInternal(bool tuning) {
         QTextStream(stdout) << "second network " << net2 << "." << endl;
     }
     if (ob.value("cmd").toString() == "wait") {
-        o.type(Order::Wait);
         parameters["minutes"] = ob.value("minutes").toString();
+
+        o.type(Order::Wait);
         o.parameters(parameters);
         QTextStream(stdout) << "minutes: " << parameters["minutes"]  << "." << endl;
     }
@@ -499,7 +517,8 @@ bool Management::networkExists(const QString &name, const QString &gzipHash) {
             if (result == gzipHash) {
                 return true;
             }
-            QTextStream(stdout) << "Downloaded network hash doesn't match, calculated: " << result << " it should be: " << gzipHash << endl;
+            QTextStream(stdout) << "Downloaded network hash doesn't match, calculated: "
+                << result << " it should be: " << gzipHash << endl;
         } else {
             QTextStream(stdout)
                 << "Unable to open network file for reading." << endl;
@@ -533,7 +552,7 @@ void Management::fetchNetwork(const QString &net, const QString &hash) {
     // Use the filename from the server.
     prog_cmdline.append(" -s -J -o " + name + " ");
     prog_cmdline.append(" -w %{filename_effective}");
-    prog_cmdline.append(" https://zero.sjeng.org/" + name);
+    prog_cmdline.append(" "+server_url + name);
 
     QProcess curl;
     curl.start(prog_cmdline);
@@ -552,6 +571,31 @@ void Management::fetchNetwork(const QString &net, const QString &hash) {
     return;
 }
 
+QString Management::fetchGameData(const QString &name, const QString &extension) {
+    QString prog_cmdline("curl");
+#ifdef WIN32
+    prog_cmdline.append(".exe");
+#endif
+
+    const auto fileName = QUuid::createUuid().toRfc4122().toHex();
+
+    // Be quiet, but output the real file name we saved.
+    // Use the filename from the server.
+    prog_cmdline.append(" -s -J -o " + fileName + "." + extension);
+    prog_cmdline.append(" -w %{filename_effective}");
+    prog_cmdline.append(" "+server_url + "view/" + name + "." + extension);
+
+    QProcess curl;
+    curl.start(prog_cmdline);
+    curl.waitForFinished(-1);
+
+    if (curl.exitCode()) {
+        throw NetworkException("Curl returned non-zero exit code "
+                               + std::to_string(curl.exitCode()));
+    }
+
+    return fileName;
+}
 
 void Management::archiveFiles(const QString &fileName) {
     if (!m_keepPath.isEmpty()) {
@@ -697,7 +741,7 @@ bool Management::sendCurl(const QStringList &lines) {
 -F options_hash=c2e3
 -F random_seed=0
 -F sgf=@file
-http://zero.sjeng.org/submit-match
+https://zero.sjeng.org/submit-match
 */
 
 void Management::uploadResult(const QMap<QString,QString> &r, const QMap<QString,QString> &l) {
@@ -720,7 +764,7 @@ void Management::uploadResult(const QMap<QString,QString> &r, const QMap<QString
     prog_cmdline.append("-F options_hash="+ l["optHash"]);
     prog_cmdline.append("-F random_seed="+ l["rndSeed"]);
     prog_cmdline.append("-F sgf=@"+ r["file"] + ".sgf.gz");
-    prog_cmdline.append("https://zero.sjeng.org/submit-match");
+    prog_cmdline.append(server_url+"submit-match");
 
     bool sent = false;
     for (auto retries = 0; retries < MAX_RETRIES; retries++) {
@@ -756,7 +800,7 @@ void Management::uploadResult(const QMap<QString,QString> &r, const QMap<QString
 -F random_seed=1
 -F sgf=@file
 -F trainingdata=@data_file
-http://zero.sjeng.org/submit
+https://zero.sjeng.org/submit
 */
 
 void Management::uploadData(const QMap<QString,QString> &r, const QMap<QString,QString> &l) {
@@ -772,7 +816,7 @@ void Management::uploadData(const QMap<QString,QString> &r, const QMap<QString,Q
     prog_cmdline.append("-F random_seed="+ l["rndSeed"]);
     prog_cmdline.append("-F sgf=@" + r["file"] + ".sgf.gz");
     prog_cmdline.append("-F trainingdata=@" + r["file"] + ".txt.0.gz");
-    prog_cmdline.append("https://zero.sjeng.org/submit");
+    prog_cmdline.append(server_url+"submit");
 
     bool sent = false;
     for (auto retries = 0; retries < MAX_RETRIES; retries++) {
diff --git a/autogtp/Management.h b/autogtp/Management.h
index d2d4c1f96..a1d43f4e8 100644
--- a/autogtp/Management.h
+++ b/autogtp/Management.h
@@ -95,6 +95,7 @@ public slots:
     QFileInfo getNextStored();
     bool networkExists(const QString &name, const QString &gzipHash);
     void fetchNetwork(const QString &net, const QString &hash);
+    QString fetchGameData(const QString &name, const QString &extension);
     void printTimingInfo(float duration);
     void runTuningProcess(const QString &tuneCmdLine);
     void gzipFile(const QString &fileName);
diff --git a/autogtp/Worker.cpp b/autogtp/Worker.cpp
index 76972ba46..79dcabd30 100644
--- a/autogtp/Worker.cpp
+++ b/autogtp/Worker.cpp
@@ -91,12 +91,10 @@ void Worker::run() {
     } while (m_state == RUNNING);
     if (m_state == STORING) {
         m_todo.add("moves", res.parameters()["moves"]);
+        m_todo.add("sgf", res.parameters()["sgf"]);
         if (res.type() == Result::StoreMatch) {
-            m_todo.add("sgfFirst", res.parameters()["sgfFirst"]);
-            m_todo.add("sgfSecond", res.parameters()["sgfSecond"]);
             m_todo.type(Order::RestoreMatch);
         } else {
-            m_todo.add("sgf", res.parameters()["sgf"]);
             m_todo.type(Order::RestoreSelfPlayed);
         }
         QString unique = QUuid::createUuid().toRfc4122().toHex();

From 08efb53fa9bd0e11905a268cc2283ee5f52e3d7a Mon Sep 17 00:00:00 2001
From: Hersmunch <hersee_a@hotmail.com>
Date: Mon, 24 Dec 2018 09:48:39 +0000
Subject: [PATCH 07/45] Support separate options for white in match games.

* Add optional separate options for white in match game.
* Fixed loading of saved match order with optionsSecond.

Pull request #2078.
---
 autogtp/Job.cpp        |  2 +-
 autogtp/Management.cpp | 12 +++++++++++-
 autogtp/Order.cpp      |  4 ++--
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/autogtp/Job.cpp b/autogtp/Job.cpp
index 30db9d03e..cb65ebd57 100644
--- a/autogtp/Job.cpp
+++ b/autogtp/Job.cpp
@@ -211,7 +211,7 @@ void ValidationJob::init(const Order &o) {
     m_engineFirst.m_network = "networks/" + o.parameters()["firstNet"] + ".gz";
     m_engineFirst.m_options = " " + o.parameters()["options"] + m_gpu + " -g -q -w ";
     m_engineSecond.m_network = "networks/" + o.parameters()["secondNet"] + ".gz";
-    m_engineSecond.m_options = " " + o.parameters()["options"] + m_gpu + " -g -q -w ";
+    m_engineSecond.m_options = " " + o.parameters()["optionsSecond"] + m_gpu + " -g -q -w ";
     m_sgf = o.parameters()["sgf"];
     m_moves = o.parameters()["moves"].toInt();
 }
diff --git a/autogtp/Management.cpp b/autogtp/Management.cpp
index cb3e51172..6cd6ae91c 100644
--- a/autogtp/Management.cpp
+++ b/autogtp/Management.cpp
@@ -283,7 +283,7 @@ Order Management::getWorkInternal(bool tuning) {
    cmd : "match",
    white_hash : "223737476718d58a4a5b0f317a1eeeb4b38f0c06af5ab65cb9d76d68d9abadb6",
    black_hash : "92c658d7325fe38f0c8adbbb1444ed17afd891b9f208003c272547a7bcb87909",
-   options_hash : "c2e3"
+   options_hash : "c2e3",
    minimum_autogtp_version: "16",
    random_seed: "2301343010299460478",
    minimum_leelaz_version: "0.15",
@@ -294,6 +294,13 @@ Order Management::getWorkInternal(bool tuning) {
        noise : "true",
        randomcnt : "30"
     },
+    white_options : {
+       playouts : "0",
+       visits: "1601",
+       resignation_percent : "5",
+       noise : "false",
+       randomcnt : "0"
+    },
     white_hash_gzip_hash: "23c29bf777e446b5c3fb0e6e7fa4d53f15b99cc0c25798b70b57877b55bf1638",
     black_hash_gzip_hash: "ccfe6023456aaaa423c29bf777e4aab481245289aaaabb70b7b5380992377aa8",
     hash_sgf_hash: "7dbccc5ad9eb38f0135ff7ec860f0e81157f47dfc0a8375cef6bf1119859e537",
@@ -438,6 +445,9 @@ Order Management::getWorkInternal(bool tuning) {
         fetchNetwork(net2, gzipHash2);
         parameters["firstNet"] = net1;
         parameters["secondNet"] = net2;
+        parameters["optionsSecond"] = ob.contains("white_options") ?
+            getOptionsString(ob.value("white_options").toObject(), rndSeed) :
+            parameters["options"];
 
         o.type(Order::Validation);
         o.parameters(parameters);
diff --git a/autogtp/Order.cpp b/autogtp/Order.cpp
index 874d892f6..044cecf8a 100644
--- a/autogtp/Order.cpp
+++ b/autogtp/Order.cpp
@@ -48,8 +48,8 @@ void Order::load(const QString &file) {
     QString key;
     for (int i = 0; i < count; i++) {
         in >> key;
-        if (key == "options") {
-           m_parameters[key] = in.readLine();
+        if (key == "options" || key == "optionsSecond") {
+            m_parameters[key] = in.readLine();
         } else {
             in >> m_parameters[key];
         }

From 39be654067127974c737769e68cf26f8c77ce40e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Forst=C3=A9n?= <henrik.forsten@gmail.com>
Date: Fri, 7 Dec 2018 14:18:44 +0200
Subject: [PATCH 08/45] Add O(sqrt(log(n))) scaling to tree search.

Pull request #2072.
---
 src/GTP.cpp     | 6 +++++-
 src/GTP.h       | 2 ++
 src/Leela.cpp   | 8 ++++++++
 src/UCTNode.cpp | 3 ++-
 4 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/GTP.cpp b/src/GTP.cpp
index 934a4a2c9..657f08a85 100644
--- a/src/GTP.cpp
+++ b/src/GTP.cpp
@@ -73,6 +73,8 @@ precision_t cfg_precision;
 #endif
 #endif
 float cfg_puct;
+float cfg_logpuct;
+float cfg_logconst;
 float cfg_softmax_temp;
 float cfg_fpu_reduction;
 float cfg_fpu_root_reduction;
@@ -133,7 +135,9 @@ void GTP::setup_default_parameters() {
     cfg_precision = precision_t::AUTO;
 #endif
 #endif
-    cfg_puct = 0.8f;
+    cfg_puct = 0.5f;
+    cfg_logpuct = 0.015f;
+    cfg_logconst = 1.7f;
     cfg_softmax_temp = 1.0f;
     cfg_fpu_reduction = 0.25f;
     // see UCTSearch::should_resign
diff --git a/src/GTP.h b/src/GTP.h
index fabff889f..f956cfb22 100644
--- a/src/GTP.h
+++ b/src/GTP.h
@@ -59,6 +59,8 @@ extern precision_t cfg_precision;
 #endif
 #endif
 extern float cfg_puct;
+extern float cfg_logpuct;
+extern float cfg_logconst;
 extern float cfg_softmax_temp;
 extern float cfg_fpu_reduction;
 extern float cfg_fpu_root_reduction;
diff --git a/src/Leela.cpp b/src/Leela.cpp
index 637276beb..e2894bf55 100644
--- a/src/Leela.cpp
+++ b/src/Leela.cpp
@@ -117,6 +117,8 @@ static void parse_commandline(int argc, char *argv[]) {
     po::options_description tuner_desc("Tuning options");
     tuner_desc.add_options()
         ("puct", po::value<float>())
+        ("logpuct", po::value<float>())
+        ("logconst", po::value<float>())
         ("softmax_temp", po::value<float>())
         ("fpu_reduction", po::value<float>())
         ;
@@ -182,6 +184,12 @@ static void parse_commandline(int argc, char *argv[]) {
     if (vm.count("puct")) {
         cfg_puct = vm["puct"].as<float>();
     }
+    if (vm.count("logpuct")) {
+        cfg_logpuct = vm["logpuct"].as<float>();
+    }
+    if (vm.count("logconst")) {
+        cfg_logconst = vm["logconst"].as<float>();
+    }
     if (vm.count("softmax_temp")) {
         cfg_softmax_temp = vm["softmax_temp"].as<float>();
     }
diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp
index 922ea3eac..fbb09f9e5 100644
--- a/src/UCTNode.cpp
+++ b/src/UCTNode.cpp
@@ -251,7 +251,8 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) {
         }
     }
 
-    const auto numerator = std::sqrt(double(parentvisits));
+    const auto numerator = std::sqrt(double(parentvisits) *
+            std::log(cfg_logpuct * double(parentvisits) + cfg_logconst));
     const auto fpu_reduction = (is_root ? cfg_fpu_root_reduction : cfg_fpu_reduction) * std::sqrt(total_visited_policy);
     // Estimated eval for unknown nodes = original parent NN eval - reduction
     const auto fpu_eval = get_net_eval(color) - fpu_reduction;

From 21e358042252548eb2059505b8f97f3a1142ea44 Mon Sep 17 00:00:00 2001
From: TFiFiE <TFiFiE@users.noreply.github.com>
Date: Mon, 24 Dec 2018 10:53:44 +0100
Subject: [PATCH 09/45] Option to get network output without writing to cache.

Pull request #2093.
---
 src/GTP.cpp     |  8 ++++----
 src/Network.cpp | 18 ++++++++++--------
 src/Network.h   |  3 ++-
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/src/GTP.cpp b/src/GTP.cpp
index 657f08a85..a231deb19 100644
--- a/src/GTP.cpp
+++ b/src/GTP.cpp
@@ -660,20 +660,20 @@ void GTP::execute(GameState & game, const std::string& xinput) {
             // Default = DIRECT with no symmetric change
             vec = s_network->get_output(
                 &game, Network::Ensemble::DIRECT,
-                Network::IDENTITY_SYMMETRY, true);
+                Network::IDENTITY_SYMMETRY, false);
         } else if (symmetry == "all") {
             for (auto s = 0; s < Network::NUM_SYMMETRIES; ++s) {
                 vec = s_network->get_output(
-                    &game, Network::Ensemble::DIRECT, s, true);
+                    &game, Network::Ensemble::DIRECT, s, false);
                 Network::show_heatmap(&game, vec, false);
             }
         } else if (symmetry == "average" || symmetry == "avg") {
             vec = s_network->get_output(
                 &game, Network::Ensemble::AVERAGE,
-                Network::NUM_SYMMETRIES, true);
+                Network::NUM_SYMMETRIES, false);
         } else {
             vec = s_network->get_output(
-                &game, Network::Ensemble::DIRECT, std::stoi(symmetry), true);
+                &game, Network::Ensemble::DIRECT, std::stoi(symmetry), false);
         }
 
         if (symmetry != "all") {
diff --git a/src/Network.cpp b/src/Network.cpp
index 933784070..57ebdd763 100644
--- a/src/Network.cpp
+++ b/src/Network.cpp
@@ -94,14 +94,14 @@ float Network::benchmark_time(int centiseconds) {
     // As a sanity run, try one run with self check.
     // Isn't enough to guarantee correctness but better than nothing,
     // plus for large nets self-check takes a while (1~3 eval per second)
-    get_output(&state, Ensemble::RANDOM_SYMMETRY, -1, true, true);
+    get_output(&state, Ensemble::RANDOM_SYMMETRY, -1, false, true, true);
 
     const Time start;
     for (auto i = 0; i < cpus; i++) {
         tg.add_task([this, &runcount, start, centiseconds, state]() {
             while (true) {
                 runcount++;
-                get_output(&state, Ensemble::RANDOM_SYMMETRY, -1, true);
+                get_output(&state, Ensemble::RANDOM_SYMMETRY, -1, false);
                 const Time end;
                 const auto elapsed = Time::timediff_centis(start, end);
                 if (elapsed >= centiseconds) {
@@ -128,7 +128,7 @@ void Network::benchmark(const GameState* const state, const int iterations) {
         tg.add_task([this, &runcount, iterations, state]() {
             while (runcount < iterations) {
                 runcount++;
-                get_output(state, Ensemble::RANDOM_SYMMETRY, -1, true);
+                get_output(state, Ensemble::RANDOM_SYMMETRY, -1, false);
             }
         });
     }
@@ -715,14 +715,14 @@ bool Network::probe_cache(const GameState* const state,
 }
 
 Network::Netresult Network::get_output(
-    const GameState* const state, const Ensemble ensemble,
-    const int symmetry, const bool skip_cache, const bool force_selfcheck) {
+    const GameState* const state, const Ensemble ensemble, const int symmetry,
+    const bool read_cache, const bool write_cache, const bool force_selfcheck) {
     Netresult result;
     if (state->board.get_boardsize() != BOARD_SIZE) {
         return result;
     }
 
-    if (!skip_cache) {
+    if (read_cache) {
         // See if we already have this in the cache.
         if (probe_cache(state, result)) {
             return result;
@@ -773,8 +773,10 @@ Network::Netresult Network::get_output(
         }
     }
 
-    // Insert result into cache.
-    m_nncache.insert(state->board.get_hash(), result);
+    if (write_cache) {
+        // Insert result into cache.
+        m_nncache.insert(state->board.get_hash(), result);
+    }
 
     return result;
 }
diff --git a/src/Network.h b/src/Network.h
index 6ab5c08b8..3282143ab 100644
--- a/src/Network.h
+++ b/src/Network.h
@@ -66,7 +66,8 @@ class Network {
     Netresult get_output(const GameState* const state,
                          const Ensemble ensemble,
                          const int symmetry = -1,
-                         const bool skip_cache = false,
+                         const bool read_cache = true,
+                         const bool write_cache = true,
                          const bool force_selfcheck = false);
 
     static constexpr auto INPUT_MOVES = 8;

From 808bb43df34654d357be2dd278eba19c03f07094 Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Fri, 4 Jan 2019 11:06:13 +0100
Subject: [PATCH 10/45] Add permission to link with NVIDIA libs. Update year.

See issue #2032.

All contributors to the core engine have given their permission to
add an additional permission to link with NVIDIA's CUDA/cuDNN/TensorRT
libraries. This makes it possible to distribute the engine when built to
use those libraries.

Update the copyright notices to 2019.
---
 CMakeLists.txt               |  2 +-
 README.md                    | 13 ++++++++++++-
 src/CPUPipe.cpp              | 13 ++++++++++++-
 src/CPUPipe.h                | 13 ++++++++++++-
 src/FastBoard.cpp            | 13 ++++++++++++-
 src/FastBoard.h              | 13 ++++++++++++-
 src/FastState.cpp            | 13 ++++++++++++-
 src/FastState.h              | 13 ++++++++++++-
 src/ForwardPipe.h            | 13 ++++++++++++-
 src/FullBoard.cpp            | 13 ++++++++++++-
 src/FullBoard.h              | 13 ++++++++++++-
 src/GTP.cpp                  | 13 ++++++++++++-
 src/GTP.h                    | 13 ++++++++++++-
 src/GameState.cpp            | 13 ++++++++++++-
 src/GameState.h              | 13 ++++++++++++-
 src/Im2Col.h                 | 13 ++++++++++++-
 src/KoState.cpp              | 13 ++++++++++++-
 src/KoState.h                | 13 ++++++++++++-
 src/Leela.cpp                | 15 +++++++++++++--
 src/NNCache.cpp              | 13 ++++++++++++-
 src/NNCache.h                | 13 ++++++++++++-
 src/Network.cpp              | 14 ++++++++++++--
 src/Network.h                | 13 ++++++++++++-
 src/OpenCL.cpp               | 13 ++++++++++++-
 src/OpenCL.h                 | 13 ++++++++++++-
 src/OpenCLScheduler.cpp      | 13 ++++++++++++-
 src/OpenCLScheduler.h        | 13 ++++++++++++-
 src/Random.cpp               | 13 ++++++++++++-
 src/Random.h                 | 13 ++++++++++++-
 src/SGFParser.cpp            | 13 ++++++++++++-
 src/SGFParser.h              | 13 ++++++++++++-
 src/SGFTree.cpp              | 13 ++++++++++++-
 src/SGFTree.h                | 13 ++++++++++++-
 src/SMP.cpp                  | 13 ++++++++++++-
 src/SMP.h                    | 13 ++++++++++++-
 src/ThreadPool.h             |  2 +-
 src/TimeControl.cpp          | 13 ++++++++++++-
 src/TimeControl.h            | 13 ++++++++++++-
 src/Timing.cpp               | 13 ++++++++++++-
 src/Timing.h                 | 13 ++++++++++++-
 src/Training.cpp             | 13 ++++++++++++-
 src/Training.h               | 13 ++++++++++++-
 src/Tuner.cpp                | 13 ++++++++++++-
 src/Tuner.h                  | 13 ++++++++++++-
 src/UCTNode.cpp              | 13 ++++++++++++-
 src/UCTNode.h                | 13 ++++++++++++-
 src/UCTNodePointer.cpp       | 13 ++++++++++++-
 src/UCTNodePointer.h         | 13 ++++++++++++-
 src/UCTNodeRoot.cpp          | 13 ++++++++++++-
 src/UCTSearch.cpp            | 13 ++++++++++++-
 src/UCTSearch.h              | 13 ++++++++++++-
 src/Utils.cpp                | 13 ++++++++++++-
 src/Utils.h                  | 13 ++++++++++++-
 src/Zobrist.cpp              | 13 ++++++++++++-
 src/Zobrist.h                | 13 ++++++++++++-
 src/config.h                 | 13 ++++++++++++-
 src/kernels/convolve1.opencl | 13 ++++++++++++-
 src/kernels/convolve3.opencl | 13 ++++++++++++-
 src/tests/gtests.cpp         | 13 ++++++++++++-
 src/tests/utils_unittest.cpp | 13 ++++++++++++-
 60 files changed, 699 insertions(+), 62 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 07dc864c9..617e4b2f3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
 # This file is part of Leela Zero.
 # Copyright (C) 2017 Marco Calignano
-# Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+# Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 # Leela Zero is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
diff --git a/README.md b/README.md
index f2b567dc1..4d5d6a58e 100644
--- a/README.md
+++ b/README.md
@@ -319,7 +319,7 @@ If interrupted, training can be resumed with:
 
 - [ ] Further optimize Winograd transformations.
 - [ ] Implement GPU batching.
-- [ ] GTP extention to exclude moves from analysis.
+- [ ] GTP extension to exclude moves from analysis.
 - [ ] Root filtering for handicap play.
 - More backends:
 - [ ] MKL-DNN based backend.
@@ -350,3 +350,14 @@ https://github.com/LeelaChessZero/lc0
 # License
 
 The code is released under the GPLv3 or later, except for ThreadPool.h, cl2.hpp, half.hpp and the eigen and clblast_level3 subdirs, which have specific licenses (compatible with GPLv3) mentioned in those files.
+
+Additional permission under GNU GPL version 3 section 7
+
+If you modify this Program, or any covered work, by linking or
+combining it with NVIDIA Corporation's libraries from the
+NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+Network library and/or the NVIDIA TensorRT inference library
+(or a modified version of those libraries), containing parts covered
+by the terms of the respective license agreement, the licensors of
+this Program grant you additional permission to convey the resulting
+work.
\ No newline at end of file
diff --git a/src/CPUPipe.cpp b/src/CPUPipe.cpp
index 4c163c32f..c898fe86b 100644
--- a/src/CPUPipe.cpp
+++ b/src/CPUPipe.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "config.h"
diff --git a/src/CPUPipe.h b/src/CPUPipe.h
index 6d389b3c5..8b0730498 100644
--- a/src/CPUPipe.h
+++ b/src/CPUPipe.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2018 Junhee Yoo and contributors
+    Copyright (C) 2018-2019 Junhee Yoo and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef CPUPIPE_H_INCLUDED
diff --git a/src/FastBoard.cpp b/src/FastBoard.cpp
index 3828d8601..4b93feab2 100644
--- a/src/FastBoard.cpp
+++ b/src/FastBoard.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "FastBoard.h"
diff --git a/src/FastBoard.h b/src/FastBoard.h
index 454fa65b4..5f82a3be6 100644
--- a/src/FastBoard.h
+++ b/src/FastBoard.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef FASTBOARD_H_INCLUDED
diff --git a/src/FastState.cpp b/src/FastState.cpp
index a22a3cb21..ef4850d32 100644
--- a/src/FastState.cpp
+++ b/src/FastState.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "config.h"
diff --git a/src/FastState.h b/src/FastState.h
index 880465068..369985e7a 100644
--- a/src/FastState.h
+++ b/src/FastState.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef FASTSTATE_H_INCLUDED
diff --git a/src/ForwardPipe.h b/src/ForwardPipe.h
index 0f155cb20..fcba8861c 100644
--- a/src/ForwardPipe.h
+++ b/src/ForwardPipe.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2018 Junhee Yoo and contributors
+    Copyright (C) 2018-2019 Junhee Yoo and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef FORWARDPIPE_H_INCLUDED
diff --git a/src/FullBoard.cpp b/src/FullBoard.cpp
index f9acec469..ec4ff653e 100644
--- a/src/FullBoard.cpp
+++ b/src/FullBoard.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "config.h"
diff --git a/src/FullBoard.h b/src/FullBoard.h
index fb8bd5d3b..038057f26 100644
--- a/src/FullBoard.h
+++ b/src/FullBoard.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef FULLBOARD_H_INCLUDED
diff --git a/src/GTP.cpp b/src/GTP.cpp
index a231deb19..4b1af324f 100644
--- a/src/GTP.cpp
+++ b/src/GTP.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "config.h"
diff --git a/src/GTP.h b/src/GTP.h
index f956cfb22..1cf4c6706 100644
--- a/src/GTP.h
+++ b/src/GTP.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef GTP_H_INCLUDED
diff --git a/src/GameState.cpp b/src/GameState.cpp
index f4f5c9715..cfd01bb87 100644
--- a/src/GameState.cpp
+++ b/src/GameState.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "GameState.h"
diff --git a/src/GameState.h b/src/GameState.h
index 988655f04..050adf03e 100644
--- a/src/GameState.h
+++ b/src/GameState.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef GAMESTATE_H_INCLUDED
diff --git a/src/Im2Col.h b/src/Im2Col.h
index a79f90d24..2afc13e4d 100644
--- a/src/Im2Col.h
+++ b/src/Im2Col.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef IM2COL_H_INCLUDED
diff --git a/src/KoState.cpp b/src/KoState.cpp
index 0f65952f9..ea1c51a9f 100644
--- a/src/KoState.cpp
+++ b/src/KoState.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "config.h"
diff --git a/src/KoState.h b/src/KoState.h
index 2a9ba050c..1f6805af1 100644
--- a/src/KoState.h
+++ b/src/KoState.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef KOSTATE_H_INCLUDED
diff --git a/src/Leela.cpp b/src/Leela.cpp
index e2894bf55..a2456a1a1 100644
--- a/src/Leela.cpp
+++ b/src/Leela.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "config.h"
@@ -43,7 +54,7 @@ using namespace Utils;
 
 static void license_blurb() {
     printf(
-        "Leela Zero %s  Copyright (C) 2017-2018  Gian-Carlo Pascutto and contributors\n"
+        "Leela Zero %s  Copyright (C) 2017-2019  Gian-Carlo Pascutto and contributors\n"
         "This program comes with ABSOLUTELY NO WARRANTY.\n"
         "This is free software, and you are welcome to redistribute it\n"
         "under certain conditions; see the COPYING file for details.\n\n",
diff --git a/src/NNCache.cpp b/src/NNCache.cpp
index ff52dc8f9..739e2793c 100644
--- a/src/NNCache.cpp
+++ b/src/NNCache.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Michael O and contributors
+    Copyright (C) 2017-2019 Michael O and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "config.h"
diff --git a/src/NNCache.h b/src/NNCache.h
index 14d9a80f0..58632673c 100644
--- a/src/NNCache.h
+++ b/src/NNCache.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Michael O and contributors
+    Copyright (C) 2017-2019 Michael O and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef NNCACHE_H_INCLUDED
diff --git a/src/Network.cpp b/src/Network.cpp
index 57ebdd763..f8787391a 100644
--- a/src/Network.cpp
+++ b/src/Network.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,8 +14,18 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
-*/
 
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
+*/
 
 #include "config.h"
 
diff --git a/src/Network.h b/src/Network.h
index 3282143ab..6ff1fb67e 100644
--- a/src/Network.h
+++ b/src/Network.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef NETWORK_H_INCLUDED
diff --git a/src/OpenCL.cpp b/src/OpenCL.cpp
index 0d4abe04f..23c2bfd3d 100644
--- a/src/OpenCL.cpp
+++ b/src/OpenCL.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "config.h"
diff --git a/src/OpenCL.h b/src/OpenCL.h
index 57793fa65..5c86f06bf 100644
--- a/src/OpenCL.h
+++ b/src/OpenCL.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef OPENCL_H_INCLUDED
diff --git a/src/OpenCLScheduler.cpp b/src/OpenCLScheduler.cpp
index 09f1eb691..96c8b9349 100644
--- a/src/OpenCLScheduler.cpp
+++ b/src/OpenCLScheduler.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2018 Junhee Yoo and contributors
+    Copyright (C) 2018-2019 Junhee Yoo and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 #include "config.h"
 
diff --git a/src/OpenCLScheduler.h b/src/OpenCLScheduler.h
index 3458a5f00..329f25375 100644
--- a/src/OpenCLScheduler.h
+++ b/src/OpenCLScheduler.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2018 Junhee Yoo and contributors
+    Copyright (C) 2018-2019 Junhee Yoo and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef OPENCLSCHEDULER_H_INCLUDED
diff --git a/src/Random.cpp b/src/Random.cpp
index 7e888ad9b..2726322ec 100644
--- a/src/Random.cpp
+++ b/src/Random.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "config.h"
diff --git a/src/Random.h b/src/Random.h
index c84ac7b45..d60fe4e5f 100644
--- a/src/Random.h
+++ b/src/Random.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef RANDOM_H_INCLUDED
diff --git a/src/SGFParser.cpp b/src/SGFParser.cpp
index 508d1fa6b..144173496 100644
--- a/src/SGFParser.cpp
+++ b/src/SGFParser.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "SGFParser.h"
diff --git a/src/SGFParser.h b/src/SGFParser.h
index 04132322f..4151a29e6 100644
--- a/src/SGFParser.h
+++ b/src/SGFParser.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef SGFPARSER_H_INCLUDED
diff --git a/src/SGFTree.cpp b/src/SGFTree.cpp
index d958a7508..811d892cc 100644
--- a/src/SGFTree.cpp
+++ b/src/SGFTree.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "config.h"
diff --git a/src/SGFTree.h b/src/SGFTree.h
index 59718e05e..740e7f019 100644
--- a/src/SGFTree.h
+++ b/src/SGFTree.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef SGFTREE_H_INCLUDED
diff --git a/src/SMP.cpp b/src/SMP.cpp
index 7e387c0ad..a42f09769 100644
--- a/src/SMP.cpp
+++ b/src/SMP.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "SMP.h"
diff --git a/src/SMP.h b/src/SMP.h
index fd16e53fe..6c110eb28 100644
--- a/src/SMP.h
+++ b/src/SMP.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef SMP_H_INCLUDED
diff --git a/src/ThreadPool.h b/src/ThreadPool.h
index 250abd6e6..4c245fa86 100644
--- a/src/ThreadPool.h
+++ b/src/ThreadPool.h
@@ -4,7 +4,7 @@
     Extended from code:
     Copyright (c) 2012 Jakob Progsch, Václav Zeman
     Modifications:
-    Copyright (c) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (c) 2017-2019 Gian-Carlo Pascutto and contributors
 
     This software is provided 'as-is', without any express or implied
     warranty. In no event will the authors be held liable for any damages
diff --git a/src/TimeControl.cpp b/src/TimeControl.cpp
index 2ca58b8c7..5660e97cc 100644
--- a/src/TimeControl.cpp
+++ b/src/TimeControl.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "TimeControl.h"
diff --git a/src/TimeControl.h b/src/TimeControl.h
index 9af38017b..896659ff3 100644
--- a/src/TimeControl.h
+++ b/src/TimeControl.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef TIMECONTROL_H_INCLUDED
diff --git a/src/Timing.cpp b/src/Timing.cpp
index 907d56ce6..1da648d15 100644
--- a/src/Timing.cpp
+++ b/src/Timing.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "Timing.h"
diff --git a/src/Timing.h b/src/Timing.h
index e5b223f4c..badc2926f 100644
--- a/src/Timing.h
+++ b/src/Timing.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef TIMING_H_INCLUDED
diff --git a/src/Training.cpp b/src/Training.cpp
index f0a107185..9228ae6e4 100644
--- a/src/Training.cpp
+++ b/src/Training.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "Training.h"
diff --git a/src/Training.h b/src/Training.h
index 142cd2018..3cbf9dd16 100644
--- a/src/Training.h
+++ b/src/Training.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef TRAINING_H_INCLUDED
diff --git a/src/Tuner.cpp b/src/Tuner.cpp
index 4fe23bc9e..5fee22cf3 100644
--- a/src/Tuner.cpp
+++ b/src/Tuner.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "config.h"
diff --git a/src/Tuner.h b/src/Tuner.h
index 579c677d4..30e230fe5 100644
--- a/src/Tuner.h
+++ b/src/Tuner.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef SGEMM_TUNER_H_INCLUDED
diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp
index fbb09f9e5..fb7729d2e 100644
--- a/src/UCTNode.cpp
+++ b/src/UCTNode.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "config.h"
diff --git a/src/UCTNode.h b/src/UCTNode.h
index 309492e7f..ba08b2ef9 100644
--- a/src/UCTNode.h
+++ b/src/UCTNode.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef UCTNODE_H_INCLUDED
diff --git a/src/UCTNodePointer.cpp b/src/UCTNodePointer.cpp
index dea6e3dad..6c2f80364 100644
--- a/src/UCTNodePointer.cpp
+++ b/src/UCTNodePointer.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2018-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "config.h"
diff --git a/src/UCTNodePointer.h b/src/UCTNodePointer.h
index 5f5f4b2c8..156b6f0aa 100644
--- a/src/UCTNodePointer.h
+++ b/src/UCTNodePointer.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2018 Gian-Carlo Pascutto
+    Copyright (C) 2018-2019 Gian-Carlo Pascutto
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef UCTNODEPOINTER_H_INCLUDED
diff --git a/src/UCTNodeRoot.cpp b/src/UCTNodeRoot.cpp
index b01a260ba..883ac57ae 100644
--- a/src/UCTNodeRoot.cpp
+++ b/src/UCTNodeRoot.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2018 Gian-Carlo Pascutto
+    Copyright (C) 2018-2019 Gian-Carlo Pascutto
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "config.h"
diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp
index 1a4f61e1f..bc6d01907 100644
--- a/src/UCTSearch.cpp
+++ b/src/UCTSearch.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "config.h"
diff --git a/src/UCTSearch.h b/src/UCTSearch.h
index e34edac3d..739f74ac7 100644
--- a/src/UCTSearch.h
+++ b/src/UCTSearch.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef UCTSEARCH_H_INCLUDED
diff --git a/src/Utils.cpp b/src/Utils.cpp
index d71e66f4f..2f529e6af 100644
--- a/src/Utils.cpp
+++ b/src/Utils.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "config.h"
diff --git a/src/Utils.h b/src/Utils.h
index 0ad207768..9dfe590e4 100644
--- a/src/Utils.h
+++ b/src/Utils.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef UTILS_H_INCLUDED
diff --git a/src/Zobrist.cpp b/src/Zobrist.cpp
index 3e98fe922..dc371b9cf 100644
--- a/src/Zobrist.cpp
+++ b/src/Zobrist.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include "config.h"
diff --git a/src/Zobrist.h b/src/Zobrist.h
index e888e69a2..36f97faf4 100644
--- a/src/Zobrist.h
+++ b/src/Zobrist.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 #ifndef ZOBRIST_H_INCLUDED
 #define ZOBRIST_H_INCLUDED
diff --git a/src/config.h b/src/config.h
index f9df82346..34af48d89 100644
--- a/src/config.h
+++ b/src/config.h
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #ifndef CONFIG_H_INCLUDED
diff --git a/src/kernels/convolve1.opencl b/src/kernels/convolve1.opencl
index a0376ece8..7f25eb8ad 100644
--- a/src/kernels/convolve1.opencl
+++ b/src/kernels/convolve1.opencl
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 // Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string
diff --git a/src/kernels/convolve3.opencl b/src/kernels/convolve3.opencl
index 58588f278..c422f55a5 100644
--- a/src/kernels/convolve3.opencl
+++ b/src/kernels/convolve3.opencl
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 // Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string
diff --git a/src/tests/gtests.cpp b/src/tests/gtests.cpp
index db3d440ca..bcd0d9558 100644
--- a/src/tests/gtests.cpp
+++ b/src/tests/gtests.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2018 Gian-Carlo Pascutto and contributors
+    Copyright (C) 2018-2019 Gian-Carlo Pascutto and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 #include <gtest/gtest.h>
 
diff --git a/src/tests/utils_unittest.cpp b/src/tests/utils_unittest.cpp
index f3f898847..9c645e93c 100644
--- a/src/tests/utils_unittest.cpp
+++ b/src/tests/utils_unittest.cpp
@@ -1,6 +1,6 @@
 /*
     This file is part of Leela Zero.
-    Copyright (C) 2018 Seth Troisi and contributors
+    Copyright (C) 2018-2019 Seth Troisi and contributors
 
     Leela Zero is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -14,6 +14,17 @@
 
     You should have received a copy of the GNU General Public License
     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+
+    Additional permission under GNU GPL version 3 section 7
+
+    If you modify this Program, or any covered work, by linking or
+    combining it with NVIDIA Corporation's libraries from the
+    NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
+    Network library and/or the NVIDIA TensorRT inference library
+    (or a modified version of those libraries), containing parts covered
+    by the terms of the respective license agreement, the licensors of
+    this Program grant you additional permission to convey the resulting
+    work.
 */
 
 #include <boost/math/distributions/chi_squared.hpp>

From ce41cc115b8dc9c86a89acb335a6561f0d66dfeb Mon Sep 17 00:00:00 2001
From: Jonathan Roy <jonroy7@gmail.com>
Date: Tue, 15 Jan 2019 05:34:20 -0500
Subject: [PATCH 11/45] Add link to GoReviewPartner.

Pull request #2147.
---
 README.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4d5d6a58e..bb770f940 100644
--- a/README.md
+++ b/README.md
@@ -179,6 +179,10 @@ capability.
 show variations and winning statistics in the game tree, as well as a heatmap
 on the game board.
 
+[GoReviewPartner](https://github.com/pnprog/goreviewpartner) is a tool for
+automated review and analysis of games using bots (saved as .rsgf files),
+Leela Zero is supported.
+
 A lot of go software can interface to an engine via GTP,
 so look around.
 
@@ -360,4 +364,4 @@ Network library and/or the NVIDIA TensorRT inference library
 (or a modified version of those libraries), containing parts covered
 by the terms of the respective license agreement, the licensors of
 this Program grant you additional permission to convey the resulting
-work.
\ No newline at end of file
+work.

From 4ca0734fa5ac5d4d1e34017ece484d70750b4bde Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Tue, 15 Jan 2019 11:39:12 +0100
Subject: [PATCH 12/45] Reminder to install OpenCL driver if seperate.

Although the OpenCL driver is generally installed as part of the driver
install, mention the requirement explicitly in case it wasn't.

See pull request #2138.
---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index bb770f940..e1151e105 100644
--- a/README.md
+++ b/README.md
@@ -87,7 +87,9 @@ the compilation instructions below and then read the [Usage](#usage-for-playing-
 https://github.com/KhronosGroup/OpenCL-Headers/tree/master/CL)
 * OpenCL ICD loader (ocl-icd-libopencl1 on Debian/Ubuntu, or reference implementation at https://github.com/KhronosGroup/OpenCL-ICD-Loader)
 * An OpenCL capable device, preferably a very, very fast GPU, with recent
-drivers is strongly recommended (OpenCL 1.1 support is enough).
+drivers is strongly recommended (OpenCL 1.1 support is enough). Don't
+forget to install the OpenCL driver if this part is packaged seperately
+by the Linux distribution (e.g. nvidia-opencl-icd).
 If you do not have a GPU, add the define "USE_CPU_ONLY", for example
 by adding -DUSE_CPU_ONLY=1 to the cmake command line.
 * Optional: BLAS Library: OpenBLAS (libopenblas-dev) or Intel MKL

From d4c038071fcf2fbaaa32f2c012f037136bd70ac3 Mon Sep 17 00:00:00 2001
From: Alexander Taylor <alexanderjohntaylor@gmail.com>
Date: Tue, 15 Jan 2019 10:41:50 +0000
Subject: [PATCH 13/45] Fixed leelaz_file on Android.

Pull request #2135.
---
 src/Utils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Utils.cpp b/src/Utils.cpp
index 2f529e6af..1d6cff720 100644
--- a/src/Utils.cpp
+++ b/src/Utils.cpp
@@ -188,7 +188,7 @@ size_t Utils::ceilMultiple(size_t a, size_t b) {
 }
 
 const std::string Utils::leelaz_file(std::string file) {
-#ifdef _WIN32
+#if defined(_WIN32) || defined(__ANDROID__)
     boost::filesystem::path dir(boost::filesystem::current_path());
 #else
     // https://stackoverflow.com/a/26696759

From f944b97937fa8a098b9a0785c0dd773da91e7387 Mon Sep 17 00:00:00 2001
From: Arseny Krasutsky <akdtg@users.noreply.github.com>
Date: Tue, 15 Jan 2019 13:42:38 +0300
Subject: [PATCH 14/45] Fix 'catching polymorphic type by value' warning.

Pull request #2134.
---
 autogtp/Management.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/autogtp/Management.cpp b/autogtp/Management.cpp
index 6cd6ae91c..2ea5a7b8b 100644
--- a/autogtp/Management.cpp
+++ b/autogtp/Management.cpp
@@ -483,7 +483,7 @@ Order Management::getWork(bool tuning) {
     for (auto retries = 0; retries < MAX_RETRIES; retries++) {
         try {
             return getWorkInternal(tuning);
-        } catch (NetworkException ex) {
+        } catch (const NetworkException &ex) {
             QTextStream(stdout)
                 << "Network connection to server failed." << endl;
             QTextStream(stdout)
@@ -704,7 +704,7 @@ void Management::sendAllGames() {
                     QThread::sleep(10);
                 }
             }
-        } catch (NetworkException ex) {
+        } catch (const NetworkException &ex) {
             QTextStream(stdout)
                 << "Network connection to server failed." << endl;
             QTextStream(stdout)
@@ -781,7 +781,7 @@ void Management::uploadResult(const QMap<QString,QString> &r, const QMap<QString
         try {
             sent = sendCurl(prog_cmdline);
             break;
-        } catch (NetworkException ex) {
+        } catch (const NetworkException &ex) {
             QTextStream(stdout)
                 << "Network connection to server failed." << endl;
             QTextStream(stdout)
@@ -833,7 +833,7 @@ void Management::uploadData(const QMap<QString,QString> &r, const QMap<QString,Q
         try {
             sent = sendCurl(prog_cmdline);
             break;
-        } catch (NetworkException ex) {
+        } catch (const NetworkException &ex) {
             QTextStream(stdout)
                 << "Network connection to server failed." << endl;
             QTextStream(stdout)

From 4f12925d4d3bfe3ec3d0edf42a454ee5aaa9f6c0 Mon Sep 17 00:00:00 2001
From: Seth Troisi <sethtroisi@google.com>
Date: Tue, 15 Jan 2019 03:04:23 -0800
Subject: [PATCH 15/45] Fixed converter script for minigo removing bias.

Fixes #2020.

Pull request #2133.
---
 training/minigo/convert_minigo.py | 231 ++++++++++++++++++++++--------
 1 file changed, 175 insertions(+), 56 deletions(-)

diff --git a/training/minigo/convert_minigo.py b/training/minigo/convert_minigo.py
index d4474e081..b23f05e37 100755
--- a/training/minigo/convert_minigo.py
+++ b/training/minigo/convert_minigo.py
@@ -1,56 +1,151 @@
 #!/usr/bin/env python3
-import tensorflow as tf
-import numpy as np
+import re
+import os
 import sys
 
-if len(sys.argv) < 2:
-    print('Model filename without extension needed as an argument.')
-    exit()
+import numpy as np
+import tensorflow as tf
 
-sess = tf.Session()
-saver = tf.train.import_meta_graph(sys.argv[1]+'.meta')
-saver.restore(sess, sys.argv[1])
+# Hide boring TF log statements
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'  # or any {'0', '1', '2'}
 
-if 0:
-    # Exports graph to tensorboard
-    with tf.Session() as sess:
-        writer = tf.summary.FileWriter('logs', sess.graph)
-        writer.close()
-
-trainable_names = []
-for v in tf.trainable_variables():
-    trainable_names.append(v.name)
-
-weights = []
-for v in tf.global_variables():
-    if v.name in trainable_names:
-        weights.append(v)
-    elif 'batch_normalization' in v.name:
-        # Moving mean and variance are not trainable, but are needed for the model
-        if 'moving_mean' in v.name or 'moving_variance' in v.name:
+
+def matches(name, parts):
+    return all(part in name for part in parts)
+
+def deduped(names):
+    names = [re.sub('_\d+', '', name) for name in names]
+    return sorted([(n, names.count(n)) for n in set(names)])
+
+def getMinigoWeightsV1(model):
+    """Load and massage Minigo weights to Leela format.
+
+    This version works on older models (v9 or before)
+    But was broken when conv bias was removed in v10
+    See: https://github.com/tensorflow/minigo/pull/292 and
+         https://github.com/gcp/leela-zero/issues/2020
+    """
+    sess = tf.Session()
+    saver = tf.train.import_meta_graph(model+'.meta')
+    saver.restore(sess, model)
+
+    trainable_names = []
+    for v in tf.trainable_variables():
+        trainable_names.append(v.name)
+
+    weights = []
+    for v in tf.global_variables():
+        if v.name in trainable_names:
             weights.append(v)
+        elif 'batch_normalization' in v.name:
+            # Moving mean and variance are not trainable, but are needed for the model
+            if 'moving_mean' in v.name or 'moving_variance' in v.name:
+                weights.append(v)
 
-if 0:
+    # To match the format of V2
+    weights_v2_format = []
     for w in weights:
-        print(w.name)
+        nparray = w.eval(session=sess)
+        weights_v2_format.append((w.name, nparray))
+    return weights_v2_format
+
+def getMinigoWeightsV2(model):
+    """Load and massage Minigo weights to Leela format.
+
+    This version works on older models (v9 or before)
+    But was broken when conv bias was removed in v10
+    See: https://github.com/tensorflow/minigo/pull/292 and
+         https://github.com/gcp/leela-zero/issues/2020
+    """
+    var_names = tf.train.load_checkpoint(model).get_variable_to_dtype_map()
+
+    # count() overcounts by 3 from policy/value head and each layer has two convolutions.
+    layers = (max([count for n, count in deduped(var_names)]) - 3) // 2
+    print (layers, 'layers')
+
+    has_conv_bias = any(matches(name, ('conv2d', 'bias')) for name in var_names.keys())
+    if not has_conv_bias:
+        print('Did not find conv bias in this model, using all zeros')
+    empty_conv_bias = tf.constant([], name='placeholder_for_conv_bias')
+
+    # 2 * layer copies of
+    #   6*n + 0: conv2d/kernel:0
+    #   6*n + 1: conv2d/bias:0
+    #   6*n + 2: batch_normalization/gamma:0
+    #   6*n + 3: batch_normalization/beta:0
+    #   6*n + 4: batch_normalization/moving_mean:0
+    #   6*n + 5: batch_normalization/moving_variance:0
+    # at the end 2x
+    #   conv2d_39/kernel:0
+    #   conv2d_39/bias:0
+    #   batch_normalization_39/moving_mean:0
+    #   batch_normalization_39/moving_variance:0
+    #   dense/kernel:0
+    #   dense/bias:0
+    # final value dense
+    #   dense_2/kernel:0
+    #   dense_2/bias:0
+
+    weight_names = []
+
+    def tensor_number(number):
+        return '' if number ==0 else '_' + str(number)
+
+    def add_conv(number, with_gamma=True):
+        number = tensor_number(number)
+        weight_names.append('conv2d{}/kernel:0'.format(number))
+        weight_names.append('conv2d{}/bias:0'.format(number))
+        if with_gamma:
+            weight_names.append('batch_normalization{}/gamma:0'.format(number))
+            weight_names.append('batch_normalization{}/beta:0'.format(number))
+        weight_names.append('batch_normalization{}/moving_mean:0'.format(number))
+        weight_names.append('batch_normalization{}/moving_variance:0'.format(number))
+
+    def add_dense(number):
+        number = tensor_number(number)
+        weight_names.append('dense{}/kernel:0'.format(number))
+        weight_names.append('dense{}/bias:0'.format(number))
+
+    # This blindly builds the correct names for the tensors.
+    for l in range(2 * layers + 1):
+        add_conv(l)
+
+    add_conv(2 * layers + 1, with_gamma=False)
+    add_dense(0)
+    add_conv(2 * layers + 2, with_gamma=False)
+    add_dense(1)
+    add_dense(2)
+
+    # This tries to load the data for each tensors.
+    weights = []
+    for i, name in enumerate(weight_names):
+        if matches(name, ('conv2d', 'bias')) and not has_conv_bias:
+            w = np.zeros(weights[-1][1].shape[-1:])
+        else:
+            w = tf.train.load_variable(model, name)
+
+#        print ("{:45} {} {}".format(name, type(w), w.shape))
+        weights.append((name, w))
+    return weights
 
 def merge_gammas(weights):
     out_weights = []
     skip = 0
-    for e, w in enumerate(weights):
+    for e, (name, w) in enumerate(weights):
         if skip > 0:
             skip -= 1
             continue
-        if 'kernel' in w.name and 'conv2d' in w.name and 'gamma' in weights[e+2].name:
+
+        if matches(name, ('conv2d', 'kernel')) and 'gamma' in weights[e+2][0]:
             kernel = w
-            bias = weights[e+1]
-            gamma = weights[e+2]
-            beta = weights[e+3]
-            mean = weights[e+4]
-            var = weights[e+5]
-
-            new_kernel = kernel * tf.reshape(gamma, (1, 1, 1, -1))
-            new_bias = gamma * bias + beta * tf.sqrt(var + tf.constant(1e-5)) 
+            bias = weights[e+1][1]
+            gamma = weights[e+2][1]
+            beta = weights[e+3][1]
+            mean = weights[e+4][1]
+            var = weights[e+5][1]
+
+            new_kernel = kernel * np.reshape(gamma, (1, 1, 1, -1))
+            new_bias = gamma * bias + beta * np.sqrt(var + 1e-5)
             new_mean = mean * gamma
 
             out_weights.append(new_kernel)
@@ -59,14 +154,15 @@ def merge_gammas(weights):
             out_weights.append(var)
 
             skip = 5
-        elif 'dense' in w.name and 'kernel' in w.name:
+
+        elif matches(name, ('dense', 'kernel')):
             # Minigo uses channels last order while LZ uses channels first,
             # Do some surgery for the dense layers to make the output match.
-            planes = w.shape[0].value//361
+            planes = w.shape[0] // 361
             if planes > 0:
-                w1 = tf.reshape(w, [19, 19, planes, -1])
-                w2 = tf.transpose(w1, [2, 0, 1, 3])
-                new_kernel = tf.reshape(w2, [361*planes, -1])
+                w1 = np.reshape(w, [19, 19, planes, -1])
+                w2 = np.transpose(w1, [2, 0, 1, 3])
+                new_kernel = np.reshape(w2, [361*planes, -1])
                 out_weights.append(new_kernel)
             else:
                 out_weights.append(w)
@@ -76,15 +172,15 @@ def merge_gammas(weights):
     return out_weights
 
 def save_leelaz_weights(filename, weights):
-    with open(filename, "w") as file:
+    with open(filename, 'w') as file:
         # Version tag
         # Minigo outputs winrate from blacks point of view (same as ELF)
-        file.write("2")
+        file.write('2')
         for e, w in enumerate(weights):
             # Newline unless last line (single bias)
-            file.write("\n")
+            file.write('\n')
             work_weights = None
-            if w.shape.ndims == 4:
+            if len(w.shape) == 4:
                 # Convolution weights need a transpose
                 #
                 # TF (kYXInputOutput)
@@ -92,28 +188,51 @@ def save_leelaz_weights(filename, weights):
                 #
                 # Leela/cuDNN/Caffe (kOutputInputYX)
                 # [output, input, filter_size, filter_size]
-                work_weights = tf.transpose(w, [3, 2, 0, 1])
-            elif w.shape.ndims == 2:
+                work_weights = np.transpose(w, [3, 2, 0, 1])
+            elif len(w.shape) == 2:
                 # Fully connected layers are [in, out] in TF
                 #
                 # [out, in] in Leela
                 #
-                work_weights = tf.transpose(w, [1, 0])
+                work_weights = np.transpose(w, [1, 0])
             else:
                 # Biases, batchnorm etc
                 work_weights = w
-            nparray = work_weights.eval(session=sess)
             if e == 0:
                 # Fix input planes
-                
+                #
                 # Add zero weights for white to play input plane
-                nparray = np.pad(nparray, ((0, 0), (0, 1), (0, 0), (0, 0)), 'constant', constant_values=0)
+                work_weights = np.pad(work_weights, ((0, 0), (0, 1), (0, 0), (0, 0)), 'constant', constant_values=0)
 
                 # Permutate weights
                 p = [0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15, 16, 17]
 
-                nparray = nparray[:, p, :, :]
-            wt_str = [str(wt) for wt in np.ravel(nparray)]
-            file.write(" ".join(wt_str))
+                work_weights = work_weights[:, p, :, :]
+            wt_str = ["{:0.8g}".format(wt) for wt in np.ravel(work_weights)]
+            file.write(' '.join(wt_str))
+
+
+if len(sys.argv) < 2:
+    print('Model filename without extension needed as an argument.')
+    exit()
+
+model = sys.argv[1]
+
+print ('loading ', model)
+print ()
+
+# Can be used for v9 or before models.
+# weights = getMinigoWeightsV1(model)
+weights = getMinigoWeightsV2(model)
+
+if 0:
+    for name, variables in [
+            ('load_checkpoint', var_names.keys()),
+    #        ('trainable_names', trainable_names),
+    #        ('global_variable', [v.name for v in tf.global_variables()])
+            ]:
+        print (name, len(variables))
+        print (deduped(variables))
+        print ()
 
-save_leelaz_weights(sys.argv[1]+'_converted.txt', merge_gammas(weights))
+save_leelaz_weights(model + '_converted.txt', merge_gammas(weights))

From 44d0e6a72c53c8c9cd1536f7b482e23683c12fa3 Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Tue, 15 Jan 2019 13:18:54 +0100
Subject: [PATCH 16/45] Add zlib to the mac OS X build instructions.

See pull request #2122.
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e1151e105..e75eca350 100644
--- a/README.md
+++ b/README.md
@@ -126,7 +126,7 @@ by adding -DUSE_CPU_ONLY=1 to the cmake command line.
     git submodule update --init --recursive
 
     # Install build depedencies
-    brew install boost cmake
+    brew install boost cmake zlib
 
     # Use a stand alone build directory to keep source dir clean
     mkdir build && cd build

From d192fc63fddcdefac7210e1b07ec3c4b7b1afa3b Mon Sep 17 00:00:00 2001
From: Junhee Yoo <33939814+ihavnoid@users.noreply.github.com>
Date: Tue, 15 Jan 2019 21:21:45 +0900
Subject: [PATCH 17/45] UCTNodePtr rare race condition fix.

Calling get_eval() on zero-visit node will assert-fail.
The original code could assert-fail on b.get_eval() if 'a' and 'b' both
had zero visits but suddenly 'a' gained an additional visit.

Pull request #2110.
---
 src/UCTNode.cpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp
index fb7729d2e..392c493e8 100644
--- a/src/UCTNode.cpp
+++ b/src/UCTNode.cpp
@@ -305,15 +305,21 @@ class NodeComp : public std::binary_function<UCTNodePointer&,
                                              UCTNodePointer&, bool> {
 public:
     NodeComp(int color) : m_color(color) {};
+
+    // WARNING : on very unusual cases this can be called on multithread
+    // contexts (e.g., UCTSearch::get_pv()) so beware of race conditions
     bool operator()(const UCTNodePointer& a,
                     const UCTNodePointer& b) {
+        auto a_visit = a.get_visits();
+        auto b_visit = b.get_visits();
+
         // if visits are not same, sort on visits
-        if (a.get_visits() != b.get_visits()) {
-            return a.get_visits() < b.get_visits();
+        if (a_visit != b_visit) {
+            return a_visit < b_visit;
         }
 
         // neither has visits, sort on policy prior
-        if (a.get_visits() == 0) {
+        if (a_visit == 0) {
             return a.get_policy() < b.get_policy();
         }
 

From bd0d66734e353f244271a9a2b855ae4f41024c18 Mon Sep 17 00:00:00 2001
From: dbosst <dbosst@gmail.com>
Date: Tue, 15 Jan 2019 07:29:20 -0500
Subject: [PATCH 18/45] Make sure analysis is printed at least once.

Fixes issue #2001.

Pull request #2114.
---
 src/UCTSearch.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp
index bc6d01907..bd396bb25 100644
--- a/src/UCTSearch.cpp
+++ b/src/UCTSearch.cpp
@@ -755,6 +755,10 @@ int UCTSearch::think(int color, passflag_t passflag) {
         keeprunning &= have_alternate_moves(elapsed_centis, time_for_move);
     } while (keeprunning);
 
+    if (last_output == 0) {
+        output_analysis(m_rootstate, *m_root);
+    }
+
     // stop the search
     m_run = false;
     tg.wait_all();
@@ -820,6 +824,10 @@ void UCTSearch::ponder() {
         keeprunning &= !stop_thinking(0, 1);
     } while (!Utils::input_pending() && keeprunning);
 
+    if (last_output == 0) {
+        output_analysis(m_rootstate, *m_root);
+    }
+
     // stop the search
     m_run = false;
     tg.wait_all();

From 1960e939d76362f0e90f948f40fea066f325646f Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Tue, 15 Jan 2019 13:31:45 +0100
Subject: [PATCH 19/45] Don't post if not requested.

Follow up fix for pull request #2114.
---
 src/UCTSearch.cpp | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp
index bd396bb25..2f0157ca8 100644
--- a/src/UCTSearch.cpp
+++ b/src/UCTSearch.cpp
@@ -755,15 +755,16 @@ int UCTSearch::think(int color, passflag_t passflag) {
         keeprunning &= have_alternate_moves(elapsed_centis, time_for_move);
     } while (keeprunning);
 
-    if (last_output == 0) {
+    // Make sure to post at least once.
+    if (cfg_analyze_interval_centis && last_output == 0) {
         output_analysis(m_rootstate, *m_root);
     }
 
-    // stop the search
+    // Stop the search.
     m_run = false;
     tg.wait_all();
 
-    // reactivate all pruned root children
+    // Reactivate all pruned root children.
     for (const auto& node : m_root->get_children()) {
         node->set_active(true);
     }
@@ -773,7 +774,7 @@ int UCTSearch::think(int color, passflag_t passflag) {
         return FastBoard::PASS;
     }
 
-    // display search info
+    // Display search info.
     myprintf("\n");
     dump_stats(m_rootstate, *m_root);
     Training::record(m_network, m_rootstate, *m_root);
@@ -824,15 +825,16 @@ void UCTSearch::ponder() {
         keeprunning &= !stop_thinking(0, 1);
     } while (!Utils::input_pending() && keeprunning);
 
-    if (last_output == 0) {
+    // Make sure to post at least once.
+    if (cfg_analyze_interval_centis && last_output == 0) {
         output_analysis(m_rootstate, *m_root);
     }
 
-    // stop the search
+    // Stop the search.
     m_run = false;
     tg.wait_all();
 
-    // display search info
+    // Display search info.
     myprintf("\n");
     dump_stats(m_rootstate, *m_root);
 

From fc83ec7eb7122ef152b7eec07593dd64da476ddf Mon Sep 17 00:00:00 2001
From: Hersmunch <hersee_a@hotmail.com>
Date: Tue, 15 Jan 2019 13:31:55 +0000
Subject: [PATCH 20/45] AutoGTP: Allow specifying initial GTP commands.

* AutoGTP: Allow specifying initial GTP commands.
  Also add support for white taking the first move in handicapped job games.
* AutoGTP: Refactored core loop for match games to avoid code duplication.
* Fixed white using black's match game settings after loading from an SGF by
  moving SGF loading into Game::gameStart() to before sending GTP commands
  (except handicap commands).
* Changed so that when an SGF file is loaded, AutoGTP determines whether
  handicap is in use from the SGF rather than from any starting GTP commands.

Pull request #2096.
---
 autogtp/Game.cpp       | 51 +++++++++++++++++++++++++++---
 autogtp/Game.h         |  6 +++-
 autogtp/Job.cpp        | 72 ++++++++++++++++++++----------------------
 autogtp/Management.cpp | 25 +++++++++++++--
 autogtp/Management.h   |  1 +
 autogtp/Order.cpp      |  6 ++--
 6 files changed, 113 insertions(+), 48 deletions(-)

diff --git a/autogtp/Game.cpp b/autogtp/Game.cpp
index 228fc1d70..f87410266 100644
--- a/autogtp/Game.cpp
+++ b/autogtp/Game.cpp
@@ -26,6 +26,7 @@
 Game::Game(const Engine& engine) :
     QProcess(),
     m_engine(engine),
+    m_isHandicap(false),
     m_resignation(false),
     m_blackToMove(true),
     m_blackResigned(false),
@@ -160,7 +161,9 @@ void Game::checkVersion(const VersionTuple &min_version) {
     }
 }
 
-bool Game::gameStart(const VersionTuple &min_version) {
+bool Game::gameStart(const VersionTuple &min_version,
+                     const QString &sgf,
+                     const int moves) {
     start(m_engine.getCmdLine());
     if (!waitForStarted()) {
         error(Game::NO_LEELAZ);
@@ -170,7 +173,44 @@ bool Game::gameStart(const VersionTuple &min_version) {
     // check any return values.
     checkVersion(min_version);
     QTextStream(stdout) << "Engine has started." << endl;
-    for (auto command : m_engine.m_commands) {
+    //If there is an sgf file to start playing from then it will contain
+    //whether there is handicap in use. If there is no sgf file then instead,
+    //check whether there are any handicap commands to send (these fail
+    //if the board is not empty).
+    //Then send the rest of the GTP commands after any SGF has been loaded so
+    //that they can override any settings loaded from the SGF.
+    if (!sgf.isEmpty()) {
+        QFile sgfFile(sgf + ".sgf");
+        if (!sgfFile.exists()) {
+            QTextStream(stdout) << "Cannot find sgf file " << sgf << endl;
+            exit(EXIT_FAILURE);
+        }
+        sgfFile.open(QIODevice::Text | QIODevice::ReadOnly);
+        const auto sgfData = QTextStream(&sgfFile).readAll();
+        const auto re = QRegularExpression("HA\\[\\d+\\]");
+        const auto match = re.match(sgfData);
+        m_isHandicap = match.hasMatch();
+        sgfFile.close();
+        if (moves == 0) {
+            loadSgf(sgf);
+        } else {
+            loadSgf(sgf, moves);
+        }
+        setMovesCount(moves);
+    } else {
+        for (auto command : m_engine.m_commands.filter("handicap")) {
+            QTextStream(stdout) << command << endl;
+            if (!sendGtpCommand(command))
+            {
+                QTextStream(stdout) << "GTP failed on: " << command << endl;
+                exit(EXIT_FAILURE);
+            }
+            m_isHandicap = true;
+            m_blackToMove = false;
+        }
+    }
+    const auto re = QRegularExpression("^((?!handicap).)*$");
+    for (auto command : m_engine.m_commands.filter(re)) {
         QTextStream(stdout) << command << endl;
         if (!sendGtpCommand(command))
         {
@@ -178,7 +218,7 @@ bool Game::gameStart(const VersionTuple &min_version) {
             exit(EXIT_FAILURE);
         }
     }
-    QTextStream(stdout) << "Thinking time set." << endl;
+    QTextStream(stdout) << "Starting GTP commands sent." << endl;
     return true;
 }
 
@@ -196,7 +236,10 @@ void Game::move() {
 
 void Game::setMovesCount(int moves) {
     m_moveNum = moves;
-    m_blackToMove = (moves % 2) == 0;
+    //The game always starts at move 0 (GTP states that handicap stones are not part
+    //of the move history), so if there is no handicap then black moves on even
+    //numbered turns but if there is handicap then black moves on odd numbered turns.
+    m_blackToMove = (moves % 2) == (m_isHandicap ? 1 : 0);
 }
 
 bool Game::waitReady() {
diff --git a/autogtp/Game.h b/autogtp/Game.h
index 5173d4ff2..77f4d86ef 100644
--- a/autogtp/Game.h
+++ b/autogtp/Game.h
@@ -57,7 +57,9 @@ class Game : QProcess {
 public:
     Game(const Engine& engine);
     ~Game() = default;
-    bool gameStart(const VersionTuple& min_version);
+    bool gameStart(const VersionTuple& min_version,
+                   const QString &sgf = QString(),
+                   const int moves = 0);
     void move();
     bool waitForMove() { return waitReady(); }
     bool readMove();
@@ -80,6 +82,7 @@ class Game : QProcess {
     QString getWinnerName() const { return m_winner; }
     int getMovesCount() const { return m_moveNum; }
     void setMovesCount(int moves);
+    int getToMove() const { return m_blackToMove ? BLACK : WHITE; }
     QString getResult() const { return m_result.trimmed(); }
     enum {
         BLACK = 0,
@@ -98,6 +101,7 @@ class Game : QProcess {
     QString m_fileName;
     QString m_moveDone;
     QString m_result;
+    bool m_isHandicap;
     bool m_resignation;
     bool m_blackToMove;
     bool m_blackResigned;
diff --git a/autogtp/Job.cpp b/autogtp/Job.cpp
index cb65ebd57..dd35a13b8 100644
--- a/autogtp/Job.cpp
+++ b/autogtp/Job.cpp
@@ -68,21 +68,15 @@ WaitJob::WaitJob(QString gpu, Management *parent) :
 Result ProductionJob::execute(){
     Result res(Result::Error);
     Game game(m_engine);
-    if (!game.gameStart(m_leelazMinVersion)) {
+    if (!game.gameStart(m_leelazMinVersion, m_sgf, m_moves)) {
         return res;
     }
     if (!m_sgf.isEmpty()) {
-        if (m_moves == 0) {
-            game.loadSgf(m_sgf);
-        } else {
-            game.loadSgf(m_sgf, m_moves);
-        }
-        game.setMovesCount(m_moves);
+        QFile::remove(m_sgf + ".sgf");
         if (m_restore) {
             game.loadTraining(m_sgf);
             QFile::remove(m_sgf + ".train");
         }
-        QFile::remove(m_sgf + ".sgf");
     }
     do {
         game.move();
@@ -126,6 +120,9 @@ void ProductionJob::init(const Order &o) {
     Job::init(o);
     m_engine.m_network = "networks/" + o.parameters()["network"] + ".gz";
     m_engine.m_options = " " + o.parameters()["options"] + m_gpu + " -g -q -w ";
+    if (o.parameters().contains("gtpCommands")) {
+        m_engine.m_commands = o.parameters()["gtpCommands"].split(",");
+    }
     m_debug = o.parameters()["debug"] == "true";
     m_sgf = o.parameters()["sgf"];
     m_moves = o.parameters()["moves"].toInt();
@@ -135,48 +132,41 @@ void ProductionJob::init(const Order &o) {
 Result ValidationJob::execute(){
     Result res(Result::Error);
     Game first(m_engineFirst);
-    if (!first.gameStart(m_leelazMinVersion)) {
+    if (!first.gameStart(m_leelazMinVersion, m_sgf, m_moves)) {
         return res;
     }
     Game second(m_engineSecond);
-    if (!second.gameStart(m_leelazMinVersion)) {
+    if (!second.gameStart(m_leelazMinVersion, m_sgf, m_moves)) {
         return res;
     }
     if (!m_sgf.isEmpty()) {
-        if (m_moves == 0) {
-            first.loadSgf(m_sgf);
-            second.loadSgf(m_sgf);
-        } else {
-            first.loadSgf(m_sgf, m_moves);
-            second.loadSgf(m_sgf, m_moves);
-        }
-        first.setMovesCount(m_moves);
-        second.setMovesCount(m_moves);
         QFile::remove(m_sgf + ".sgf");
     }
 
-    QString wmove = "play white ";
-    QString bmove = "play black ";
+    const QString stringWhite = "white";
+    const QString stringBlack = "black";
+    //Start with the side to move set to the opposite of the expected way around
+    //because the game playing loop swaps the sides at the start of each iteration.
+    //This avoids having to test which side is to move on every iteration of the loop.
+    auto gameToMove = &second;
+    auto colorToMove = &stringWhite;
+    auto gameOpponent = &first;
+    auto colorOpponent = &stringBlack;
+    if (first.getToMove() == Game::WHITE) {
+        std::swap(gameToMove, gameOpponent);
+        std::swap(colorToMove, colorOpponent);
+    }
     do {
-        first.move();
-        if (!first.waitForMove()) {
+        std::swap(gameToMove, gameOpponent);
+        std::swap(colorToMove, colorOpponent);
+        gameToMove->move();
+        if (!gameToMove->waitForMove()) {
             return res;
         }
-        first.readMove();
-       m_boss->incMoves();
-        if (first.checkGameEnd()) {
-            break;
-        }
-        second.setMove(bmove + first.getMove());
-        second.move();
-        if (!second.waitForMove()) {
-            return res;
-        }
-        second.readMove();
-       m_boss->incMoves();
-        first.setMove(wmove + second.getMove());
-        second.nextMove();
-    } while (first.nextMove() && m_state.load() == RUNNING);
+        gameToMove->readMove();
+        m_boss->incMoves();
+        gameOpponent->setMove("play " + *colorToMove + " " + gameToMove->getMove());
+    } while (gameToMove->nextMove() && m_state.load() == RUNNING);
 
     switch (m_state.load()) {
     case RUNNING:
@@ -210,8 +200,14 @@ void ValidationJob::init(const Order &o) {
     Job::init(o);
     m_engineFirst.m_network = "networks/" + o.parameters()["firstNet"] + ".gz";
     m_engineFirst.m_options = " " + o.parameters()["options"] + m_gpu + " -g -q -w ";
+    if (o.parameters().contains("gtpCommands")) {
+        m_engineFirst.m_commands = o.parameters()["gtpCommands"].split(",");
+    }
     m_engineSecond.m_network = "networks/" + o.parameters()["secondNet"] + ".gz";
     m_engineSecond.m_options = " " + o.parameters()["optionsSecond"] + m_gpu + " -g -q -w ";
+    if (o.parameters().contains("gtpCommandsSecond")) {
+        m_engineSecond.m_commands = o.parameters()["gtpCommandsSecond"].split(",");
+    }
     m_sgf = o.parameters()["sgf"];
     m_moves = o.parameters()["moves"].toInt();
 }
diff --git a/autogtp/Management.cpp b/autogtp/Management.cpp
index 2ea5a7b8b..daa762fb4 100644
--- a/autogtp/Management.cpp
+++ b/autogtp/Management.cpp
@@ -22,11 +22,13 @@
 #include <QThread>
 #include <QList>
 #include <QCryptographicHash>
+#include <QJsonArray>
 #include <QJsonDocument>
 #include <QJsonObject>
 #include <QLockFile>
 #include <QUuid>
 #include <QRegularExpression>
+#include <QVariant>
 #include "Management.h"
 #include "Game.h"
 
@@ -274,6 +276,14 @@ QString Management::getOptionsString(const QJsonObject &opt, const QString &rnd)
     return options;
 }
 
+QString Management::getGtpCommandsString(const QJsonValue &gtpCommands) {
+    const auto gtpCommandsJsonDoc = QJsonDocument(gtpCommands.toArray());
+    const auto gtpCommandsJson = gtpCommandsJsonDoc.toJson(QJsonDocument::Compact);
+    auto gtpCommandsString = QVariant(gtpCommandsJson).toString();
+    gtpCommandsString.remove(QRegularExpression("[\\[\\]\"]"));
+    return gtpCommandsString;
+}
+
 Order Management::getWorkInternal(bool tuning) {
     Order o(Order::Error);
 
@@ -304,7 +314,9 @@ Order Management::getWorkInternal(bool tuning) {
     white_hash_gzip_hash: "23c29bf777e446b5c3fb0e6e7fa4d53f15b99cc0c25798b70b57877b55bf1638",
     black_hash_gzip_hash: "ccfe6023456aaaa423c29bf777e4aab481245289aaaabb70b7b5380992377aa8",
     hash_sgf_hash: "7dbccc5ad9eb38f0135ff7ec860f0e81157f47dfc0a8375cef6bf1119859e537",
-    moves_count: "92"
+    moves_count: "92",
+    gtp_commands : [ "time_settings 600 30 1", "komi 0.5", "fixed_handicap 2" ],
+    white_gtp_commands : [ "time_settings 0 10 1", "komi 0.5", "fixed_handicap 2" ],
 }
 
 {
@@ -323,7 +335,8 @@ Order Management::getWorkInternal(bool tuning) {
     },
     hash_gzip_hash: "23c29bf777e446b5c3fb0e6e7fa4d53f15b99cc0c25798b70b57877b55bf1638",
     hash_sgf_hash: "7dbccc5ad9eb38f0135ff7ec860f0e81157f47dfc0a8375cef6bf1119859e537",
-    moves_count: "92"
+    moves_count: "92",
+    gtp_commands : [ "time_settings 600 30 1", "komi 0.5", "fixed_handicap 4" ],
 }
 
 {
@@ -408,6 +421,9 @@ Order Management::getWorkInternal(bool tuning) {
         parameters["optHash"] = ob.value("options_hash").toString();
         parameters["options"] = getOptionsString(ob.value("options").toObject(), rndSeed);
     }
+    if (ob.contains("gtp_commands")) {
+        parameters["gtpCommands"] = getGtpCommandsString(ob.value("gtp_commands"));
+    }
     if (ob.contains("hash_sgf_hash")) {
         parameters["sgf"] = fetchGameData(ob.value("hash_sgf_hash").toString(), "sgf");
         parameters["moves"] = ob.contains("moves_count") ?
@@ -448,6 +464,11 @@ Order Management::getWorkInternal(bool tuning) {
         parameters["optionsSecond"] = ob.contains("white_options") ?
             getOptionsString(ob.value("white_options").toObject(), rndSeed) :
             parameters["options"];
+        if (ob.contains("gtp_commands")) {
+            parameters["gtpCommandsSecond"] = ob.contains("white_gtp_commands") ?
+                getGtpCommandsString(ob.value("white_gtp_commands")) :
+                parameters["gtpCommands"];
+        }
 
         o.type(Order::Validation);
         o.parameters(parameters);
diff --git a/autogtp/Management.h b/autogtp/Management.h
index a1d43f4e8..cfd7dc29f 100644
--- a/autogtp/Management.h
+++ b/autogtp/Management.h
@@ -90,6 +90,7 @@ public slots:
     QString getOption(const QJsonObject &ob, const QString &key, const QString &opt, const QString &defValue);
     QString getBoolOption(const QJsonObject &ob, const QString &key, const QString &opt, bool defValue);
     QString getOptionsString(const QJsonObject &opt, const QString &rnd);
+    QString getGtpCommandsString(const QJsonValue &gtpCommands);
     void sendAllGames();
     void checkStoredGames();
     QFileInfo getNextStored();
diff --git a/autogtp/Order.cpp b/autogtp/Order.cpp
index 044cecf8a..f71b1e791 100644
--- a/autogtp/Order.cpp
+++ b/autogtp/Order.cpp
@@ -42,14 +42,14 @@ void Order::load(const QString &file) {
         return;
     }
     QTextStream in(&f);
-    in >>  m_type;
+    in >> m_type;
     int count;
     in >> count;
     QString key;
     for (int i = 0; i < count; i++) {
         in >> key;
-        if (key == "options" || key == "optionsSecond") {
-            m_parameters[key] = in.readLine();
+        if (key.contains("options") || key.contains("gtpCommands")) {
+            m_parameters[key] = in.readLine().remove(0, 1);
         } else {
             in >> m_parameters[key];
         }

From c7feb53dec7ca2c98cf2f0c7ca954906736540a9 Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Tue, 15 Jan 2019 19:36:34 +0100
Subject: [PATCH 21/45] Update Eigen to 3.3.7.

This includes some optimization improvements for newer GCC/Clang that
may be relevant to a lot of our users.

Pull request #2151.
---
 src/Eigen | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Eigen b/src/Eigen
index a1b9c26c5..cf794d3b7 160000
--- a/src/Eigen
+++ b/src/Eigen
@@ -1 +1 @@
-Subproject commit a1b9c26c5e62cb8c17836e601edd64b92aa8e5ae
+Subproject commit cf794d3b741a6278df169e58461f8529f43bce5d

From 085d71b1607df8e7c4d4fff72d7b660991b53b52 Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Tue, 22 Jan 2019 17:56:50 +0100
Subject: [PATCH 22/45] Fix lz-setoption name playouts.

Fixes issue #2167.

I could swear I fixed this before. Maybe I forgot to push?
---
 src/GTP.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/GTP.cpp b/src/GTP.cpp
index 4b1af324f..ccc599e1e 100644
--- a/src/GTP.cpp
+++ b/src/GTP.cpp
@@ -1151,7 +1151,7 @@ void GTP::execute_setoption(UCTSearch & search,
         // Note that if the playouts are changed but no
         // explicit command to set memory usage is given,
         // we will stick with the initial guess we made on startup.
-        search.set_playout_limit(cfg_max_visits);
+        search.set_playout_limit(cfg_max_playouts);
 
         gtp_printf(id, "");
     } else if (name == "lagbuffer") {

From 9831c96ec6a77b800d951731bcd413365cd45e0f Mon Sep 17 00:00:00 2001
From: Hersmunch <hersee_a@hotmail.com>
Date: Tue, 22 Jan 2019 17:12:01 +0000
Subject: [PATCH 23/45] AutoGTP: More info in SGF comments.

* AutoGTP: Added full engine options and starting GTP commands
  to SGF comments that are produced.
* Refactored Game::fixSgf().

Pull request #2160.
---
 autogtp/Game.cpp | 50 +++++++++++++++++++++++++++++++++++++++---------
 autogtp/Game.h   |  7 ++++++-
 autogtp/Job.cpp  |  7 ++++---
 3 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/autogtp/Game.cpp b/autogtp/Game.cpp
index f87410266..e049a46d6 100644
--- a/autogtp/Game.cpp
+++ b/autogtp/Game.cpp
@@ -392,13 +392,8 @@ bool Game::loadSgf(const QString &fileName, const int moves) {
     return sendGtpCommand(qPrintable("loadsgf " + fileName + ".sgf " + QString::number(moves + 1)));
 }
 
-bool Game::fixSgf(const QString& weightFile, const bool resignation) {
-    QFile sgfFile(m_fileName + ".sgf");
-    if (!sgfFile.open(QIODevice::Text | QIODevice::ReadOnly)) {
-        return false;
-    }
-    QString sgfData = sgfFile.readAll();
-    QRegularExpression re("PW\\[Human\\]");
+void Game::fixSgfPlayer(QString& sgfData, const Engine& whiteEngine) {
+    QRegularExpression oldPlayer("PW\\[Human\\]");
     QString playerName("PB[Leela Zero ");
     QRegularExpression le("PB\\[Leela Zero \\S+ ");
     QRegularExpressionMatch match = le.match(sgfData);
@@ -406,10 +401,36 @@ bool Game::fixSgf(const QString& weightFile, const bool resignation) {
         playerName = match.captured(0);
     }
     playerName = "PW" + playerName.remove(0, 2);
-    playerName += weightFile.left(8);
+    playerName += whiteEngine.getNetworkFile().left(8);
     playerName += "]";
-    sgfData.replace(re, playerName);
+    sgfData.replace(oldPlayer, playerName);
+}
+
+void Game::fixSgfComment(QString& sgfData, const Engine& whiteEngine,
+    const bool isSelfPlay) {
+    QRegularExpression oldComment("(C\\[Leela Zero)( options:.*)\\]");
+    QString comment("\\1");
+    if (!isSelfPlay) {
+        comment += " Black";
+    }
+    comment += "\\2 Starting GTP commands:";
+    for (const auto command : m_engine.m_commands) {
+        comment += " " + command;
+    }
+    if (!isSelfPlay) {
+        comment += " White options:";
+        comment += whiteEngine.m_options + " " + whiteEngine.m_network;
+        comment += " Starting GTP commands:";
+        for (const auto command : whiteEngine.m_commands) {
+            comment += " " + command;
+        }
+    }
+    comment += "]";
+    comment.replace(QRegularExpression("\\s\\s+"), " ");
+    sgfData.replace(oldComment, comment);
+}
 
+void Game::fixSgfResult(QString& sgfData, const bool resignation) {
     if (resignation) {
         QRegularExpression oldResult("RE\\[B\\+.*\\]");
         QString newResult("RE[B+Resign] ");
@@ -422,7 +443,18 @@ bool Game::fixSgf(const QString& weightFile, const bool resignation) {
         QString noPass(")");
         sgfData.replace(lastpass, noPass);
     }
+}
 
+bool Game::fixSgf(const Engine& whiteEngine, const bool resignation,
+    const bool isSelfPlay) {
+    QFile sgfFile(m_fileName + ".sgf");
+    if (!sgfFile.open(QIODevice::Text | QIODevice::ReadOnly)) {
+        return false;
+    }
+    QString sgfData = sgfFile.readAll();
+    fixSgfPlayer(sgfData, whiteEngine);
+    fixSgfComment(sgfData, whiteEngine, isSelfPlay);
+    fixSgfResult(sgfData, resignation);
     sgfFile.close();
     if (sgfFile.open(QFile::WriteOnly | QFile::Truncate)) {
         QTextStream out(&sgfFile);
diff --git a/autogtp/Game.h b/autogtp/Game.h
index 77f4d86ef..3878f6137 100644
--- a/autogtp/Game.h
+++ b/autogtp/Game.h
@@ -70,7 +70,8 @@ class Game : QProcess {
     bool writeSgf();
     bool loadTraining(const QString &fileName);
     bool saveTraining();
-    bool fixSgf(const QString& weightFile, const bool resignation);
+    bool fixSgf(const Engine& whiteEngine, const bool resignation,
+        const bool isSelfPlay);
     bool dumpTraining();
     bool dumpDebug();
     void gameQuit();
@@ -112,6 +113,10 @@ class Game : QProcess {
     bool waitReady();
     bool eatNewLine();
     void error(int errnum);
+    void fixSgfPlayer(QString& sgfData, const Engine& whiteEngine);
+    void fixSgfComment(QString& sgfData, const Engine& whiteEngine,
+        const bool isSelfPlay);
+    void fixSgfResult(QString& sgfData, const bool resignation);
 };
 
 #endif /* GAME_H */
diff --git a/autogtp/Job.cpp b/autogtp/Job.cpp
index dd35a13b8..52b92bc0a 100644
--- a/autogtp/Job.cpp
+++ b/autogtp/Job.cpp
@@ -91,7 +91,7 @@ Result ProductionJob::execute(){
         QTextStream(stdout) << "Game has ended." << endl;
         if (game.getScore()) {
             game.writeSgf();
-            game.fixSgf(m_engine.getNetworkFile(), false);
+            game.fixSgf(m_engine, false, true);
             game.dumpTraining();
             if (m_debug) {
                 game.dumpDebug();
@@ -175,8 +175,9 @@ Result ValidationJob::execute(){
             res.add("score", first.getResult());
             res.add("winner", first.getWinnerName());
             first.writeSgf();
-            first.fixSgf(m_engineSecond.getNetworkFile(),
-                (res.parameters()["score"] == "B+Resign"));
+            first.fixSgf(m_engineSecond,
+                (res.parameters()["score"] == "B+Resign"),
+                false);
             res.add("file", first.getFile());
         }
         res.type(Result::Win);

From 49635351e1a2a7ccdb71439467f79c853737c0cc Mon Sep 17 00:00:00 2001
From: Seth Troisi <sethtroisi@google.com>
Date: Fri, 1 Feb 2019 02:21:23 -0800
Subject: [PATCH 24/45] Truncate and compress minigo weights.

Truncate to 4 precision and compress converted minigo weights.

Pull request #2173.
---
 training/minigo/convert_minigo.py | 54 +++++++++++++++++--------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/training/minigo/convert_minigo.py b/training/minigo/convert_minigo.py
index b23f05e37..14148454d 100755
--- a/training/minigo/convert_minigo.py
+++ b/training/minigo/convert_minigo.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+import gzip
 import re
 import os
 import sys
@@ -172,13 +173,13 @@ def merge_gammas(weights):
     return out_weights
 
 def save_leelaz_weights(filename, weights):
-    with open(filename, 'w') as file:
+    with gzip.open(filename, 'wb') as f_out:
         # Version tag
         # Minigo outputs winrate from blacks point of view (same as ELF)
-        file.write('2')
+        f_out.write(b'2')
         for e, w in enumerate(weights):
             # Newline unless last line (single bias)
-            file.write('\n')
+            f_out.write(b'\n')
             work_weights = None
             if len(w.shape) == 4:
                 # Convolution weights need a transpose
@@ -208,31 +209,36 @@ def save_leelaz_weights(filename, weights):
                 p = [0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15, 16, 17]
 
                 work_weights = work_weights[:, p, :, :]
-            wt_str = ["{:0.8g}".format(wt) for wt in np.ravel(work_weights)]
-            file.write(' '.join(wt_str))
 
+            # 80% of time is in this format line.
+            wt_str = ("{:0.4g}".format(wt) for wt in np.ravel(work_weights))
+            f_out.write(' '.join(wt_str).encode())
 
-if len(sys.argv) < 2:
-    print('Model filename without extension needed as an argument.')
-    exit()
 
-model = sys.argv[1]
+def main():
+    if len(sys.argv) < 2:
+        print('Model filename without extension needed as an argument.')
+        exit()
 
-print ('loading ', model)
-print ()
+    model = sys.argv[1]
 
-# Can be used for v9 or before models.
-# weights = getMinigoWeightsV1(model)
-weights = getMinigoWeightsV2(model)
+    print ('loading ', model)
+    print ()
 
-if 0:
-    for name, variables in [
-            ('load_checkpoint', var_names.keys()),
-    #        ('trainable_names', trainable_names),
-    #        ('global_variable', [v.name for v in tf.global_variables()])
-            ]:
-        print (name, len(variables))
-        print (deduped(variables))
-        print ()
+    # Can be used for v9 or before models.
+    # weights = getMinigoWeightsV1(model)
+    weights = getMinigoWeightsV2(model)
+    if 0:
+        for name, variables in [
+                ('load_checkpoint', var_names.keys()),
+        #        ('trainable_names', trainable_names),
+        #        ('global_variable', [v.name for v in tf.global_variables()])
+                ]:
+            print (name, len(variables))
+            print (deduped(variables))
+            print ()
 
-save_leelaz_weights(model + '_converted.txt', merge_gammas(weights))
+    save_leelaz_weights(model + '_converted.txt.gz', merge_gammas(weights))
+
+if __name__ == "__main__":
+    main()

From c01a0b5565527d438a33bf786274f9d0c8d99e57 Mon Sep 17 00:00:00 2001
From: Seth Troisi <sethtroisi@google.com>
Date: Fri, 1 Feb 2019 02:23:55 -0800
Subject: [PATCH 25/45] Add gomill-explain_last_move.

Add gomill-explain_last_move for additional output in ringmaster
competitions.

Pull request #2174.
---
 src/GTP.cpp       |  6 +++++-
 src/UCTSearch.cpp | 30 +++++++++++++++++++++---------
 src/UCTSearch.h   |  4 +++-
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/src/GTP.cpp b/src/GTP.cpp
index ccc599e1e..9f3ca846e 100644
--- a/src/GTP.cpp
+++ b/src/GTP.cpp
@@ -213,6 +213,7 @@ const std::string GTP::s_commands[] = {
     "lz-genmove_analyze",
     "lz-memory_report",
     "lz-setoption",
+    "gomill-explain_last_move",
     ""
 };
 
@@ -651,7 +652,7 @@ void GTP::execute(GameState & game, const std::string& xinput) {
         } while (game.get_passes() < 2 && !game.has_resigned());
 
         return;
-    } else if (command.find("go") == 0) {
+    } else if (command.find("go") == 0 && command.size() < 6) {
         int move = search->think(game.get_to_move());
         game.play_move(move);
 
@@ -970,6 +971,9 @@ void GTP::execute(GameState & game, const std::string& xinput) {
         return;
     } else if (command.find("lz-setoption") == 0) {
         return execute_setoption(*search.get(), id, command);
+    } else if (command.find("gomill-explain_last_move") == 0) {
+        gtp_printf(id, "%s\n", search->explain_last_think().c_str());
+        return;
     }
     gtp_fail_printf(id, "unknown command");
     return;
diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp
index 2f0157ca8..281a25bcc 100644
--- a/src/UCTSearch.cpp
+++ b/src/UCTSearch.cpp
@@ -30,6 +30,7 @@
 #include "config.h"
 #include "UCTSearch.h"
 
+#include <boost/format.hpp>
 #include <cassert>
 #include <cmath>
 #include <cstddef>
@@ -575,18 +576,16 @@ std::string UCTSearch::get_pv(FastState & state, UCTNode& parent) {
     return res;
 }
 
-void UCTSearch::dump_analysis(int playouts) {
-    if (cfg_quiet) {
-        return;
-    }
+std::string UCTSearch::get_analysis() {
+    auto playouts = m_playouts.load();
 
     FastState tempstate = m_rootstate;
     int color = tempstate.board.get_to_move();
 
-    std::string pvstring = get_pv(tempstate, *m_root);
+    auto pvstring = get_pv(tempstate, *m_root);
     float winrate = 100.0f * m_root->get_raw_eval(color);
-    myprintf("Playouts: %d, Win: %5.2f%%, PV: %s\n",
-             playouts, winrate, pvstring.c_str());
+    return str(boost::format("Playouts: %d, Win: %5.2f%%, PV: %s")
+        % playouts % winrate % pvstring.c_str());
 }
 
 bool UCTSearch::is_running() const {
@@ -746,9 +745,9 @@ int UCTSearch::think(int color, passflag_t passflag) {
 
         // output some stats every few seconds
         // check if we should still search
-        if (elapsed_centis - last_update > 250) {
+        if (!cfg_quiet && elapsed_centis - last_update > 250) {
             last_update = elapsed_centis;
-            dump_analysis(m_playouts.load());
+            myprintf("%s\n", get_analysis().c_str());
         }
         keeprunning  = is_running();
         keeprunning &= !stop_thinking(elapsed_centis, time_for_move);
@@ -788,11 +787,24 @@ int UCTSearch::think(int color, passflag_t passflag) {
              (m_playouts * 100.0) / (elapsed_centis+1));
     int bestmove = get_best_move(passflag);
 
+    // Save the explanation.
+    m_think_output =
+        str(boost::format("move %d, %c => %s\n%s")
+        % m_rootstate.get_movenum()
+        % (color == FastBoard::BLACK ? 'B' : 'W')
+        % m_rootstate.move_to_text(bestmove).c_str()
+        % get_analysis().c_str());
+
     // Copy the root state. Use to check for tree re-use in future calls.
     m_last_rootstate = std::make_unique<GameState>(m_rootstate);
     return bestmove;
 }
 
+// Brief output from last think() call.
+std::string UCTSearch::explain_last_think() const {
+    return m_think_output;
+}
+
 void UCTSearch::ponder() {
     update_root();
 
diff --git a/src/UCTSearch.h b/src/UCTSearch.h
index 739f74ac7..db4889a95 100644
--- a/src/UCTSearch.h
+++ b/src/UCTSearch.h
@@ -114,6 +114,7 @@ class UCTSearch {
     void ponder();
     bool is_running() const;
     void increment_playouts();
+    std::string explain_last_think() const;
     SearchResult play_simulation(GameState& currstate, UCTNode* const node);
 
 private:
@@ -121,7 +122,7 @@ class UCTSearch {
     void dump_stats(FastState& state, UCTNode& parent);
     void tree_stats(const UCTNode& node);
     std::string get_pv(FastState& state, UCTNode& parent);
-    void dump_analysis(int playouts);
+    std::string get_analysis();
     bool should_resign(passflag_t passflag, float besteval);
     bool have_alternate_moves(int elapsed_centis, int time_for_move);
     int est_playouts_left(int elapsed_centis, int time_for_move) const;
@@ -141,6 +142,7 @@ class UCTSearch {
     std::atomic<bool> m_run{false};
     int m_maxplayouts;
     int m_maxvisits;
+    std::string m_think_output;
 
     std::list<Utils::ThreadGroup> m_delete_futures;
 

From 671278303b1f5997e35e069449b0f5dd1da5545d Mon Sep 17 00:00:00 2001
From: betterworld <christoph-dev@bussenius.org>
Date: Fri, 1 Feb 2019 12:00:07 +0100
Subject: [PATCH 26/45] Add a feature to exclude moves from the search.

* The "avoid" command is now a param for lz-analyze and for
  lz-genmove_analyze.

New syntax is:

  `lz-analyze ARGS [avoid <color> <coords> <number_of_moves>] [avoid ...]`
  `lz-genmove_analyze ARGS [avoid <color> <coords> <number_of_moves>] [avoid ...]`

The number_of_moves is now always relative to the current move number.

Example:

  `lz-analyze b 200 avoid b q16 1 avoid b q4 1 avoid b d16 1 avoid b d4 1`

* Re-organize the parser for the "analyze" commands.

  * New tag "interval"; old syntax "100" is now short for "interval 100"
  * Tags can be specified in any arbitrary order
  * Moved all of the parsing code for "lz-anaylze" and
    "lz-genmove_analyze" into the parse_analyze_tags function
  * parse_analyze_tags uses its return value instead of side effects

* Implement the "allow" tag for lz-analyze.

It works similar to "avoid".  Adding moves to the "allow" list is the
same as adding all other moves (except pass and resign) to the "avoid" list.

* "Avoid" and "allow" moves can be specified as a comma-separated list.

Example:

  `lz-analyze b 100 avoid w q4,q16,d4,d16 2 avoid b pass 50`

Pull request #1949.
---
 src/FastState.cpp    |  12 +-
 src/GTP.cpp          | 308 +++++++++++++++++++++++++++++++++----------
 src/GTP.h            |  39 +++++-
 src/UCTNode.h        |   2 +-
 src/UCTNodeRoot.cpp  |  17 ++-
 src/UCTSearch.cpp    |  29 ++--
 src/tests/gtests.cpp | 112 ++++++++++++++++
 7 files changed, 433 insertions(+), 86 deletions(-)

diff --git a/src/FastState.cpp b/src/FastState.cpp
index ef4850d32..78f203cd5 100644
--- a/src/FastState.cpp
+++ b/src/FastState.cpp
@@ -37,6 +37,7 @@
 #include "FastBoard.h"
 #include "Utils.h"
 #include "Zobrist.h"
+#include "GTP.h"
 
 using namespace Utils;
 
@@ -73,11 +74,12 @@ void FastState::reset_board() {
 }
 
 bool FastState::is_move_legal(int color, int vertex) const {
-    return vertex == FastBoard::PASS ||
-           vertex == FastBoard::RESIGN ||
-           (vertex != m_komove &&
-                board.get_state(vertex) == FastBoard::EMPTY &&
-                !board.is_suicide(vertex, color));
+    return !cfg_analyze_tags.is_to_avoid(color, vertex, m_movenum) && (
+              vertex == FastBoard::PASS ||
+                 vertex == FastBoard::RESIGN ||
+                 (vertex != m_komove &&
+                      board.get_state(vertex) == FastBoard::EMPTY &&
+                      !board.is_suicide(vertex, color)));
 }
 
 void FastState::play_move(int vertex) {
diff --git a/src/GTP.cpp b/src/GTP.cpp
index 9f3ca846e..83f4238b0 100644
--- a/src/GTP.cpp
+++ b/src/GTP.cpp
@@ -96,7 +96,192 @@ bool cfg_quiet;
 std::string cfg_options_str;
 bool cfg_benchmark;
 bool cfg_cpu_only;
-int cfg_analyze_interval_centis;
+AnalyzeTags cfg_analyze_tags;
+
+/* Parses tags for the lz-analyze GTP command and friends */
+AnalyzeTags::AnalyzeTags(std::istringstream& cmdstream, const GameState& game) {
+    std::string tag;
+
+    /* Default color is the current one */
+    m_who = game.board.get_to_move();
+
+    auto avoid_not_pass_resign_b = false, avoid_not_pass_resign_w = false;
+    auto allow_b = false, allow_w = false;
+
+    while (true) {
+        cmdstream >> std::ws;
+        if (isdigit(cmdstream.peek())) {
+            tag = "interval";
+        } else {
+            cmdstream >> tag;
+            if (cmdstream.fail() && cmdstream.eof()) {
+                /* Parsing complete */
+                m_invalid = false;
+                return;
+            }
+        }
+
+        if (tag == "avoid" || tag == "allow") {
+            std::string textcolor, textmoves;
+            size_t until_movenum;
+            cmdstream >> textcolor;
+            cmdstream >> textmoves;
+            cmdstream >> until_movenum;
+            if (cmdstream.fail()) {
+                return;
+            }
+
+            std::vector<int> moves;
+            std::istringstream movestream(textmoves);
+            while (!movestream.eof()) {
+                std::string textmove;
+                getline(movestream, textmove, ',');
+                auto sepidx = textmove.find_first_of(':');
+                if (sepidx != std::string::npos) {
+                    if (!(sepidx == 2 || sepidx == 3)) {
+                        moves.clear();
+                        break;
+                    }
+                    auto move1_compressed = game.board.text_to_move(
+                        textmove.substr(0, sepidx)
+                    );
+                    auto move2_compressed = game.board.text_to_move(
+                        textmove.substr(sepidx + 1)
+                    );
+                    if (move1_compressed == FastBoard::NO_VERTEX ||
+                        move1_compressed == FastBoard::PASS ||
+                        move1_compressed == FastBoard::RESIGN ||
+                        move2_compressed == FastBoard::NO_VERTEX ||
+                        move2_compressed == FastBoard::PASS ||
+                        move2_compressed == FastBoard::RESIGN)
+                    {
+                        moves.clear();
+                        break;
+                    }
+                    auto move1_xy = game.board.get_xy(move1_compressed);
+                    auto move2_xy = game.board.get_xy(move2_compressed);
+                    auto xmin = std::min(move1_xy.first, move2_xy.first);
+                    auto xmax = std::max(move1_xy.first, move2_xy.first);
+                    auto ymin = std::min(move1_xy.second, move2_xy.second);
+                    auto ymax = std::max(move1_xy.second, move2_xy.second);
+                    for (auto move_x = xmin; move_x <= xmax; move_x++) {
+                        for (auto move_y = ymin; move_y <= ymax; move_y++) {
+                            moves.push_back(game.board.get_vertex(move_x,move_y));
+                        }
+                    }
+                } else {
+                    auto move = game.board.text_to_move(textmove);
+                    if (move == FastBoard::NO_VERTEX) {
+                        moves.clear();
+                        break;
+                    }
+                    moves.push_back(move);
+                }
+            }
+            if (moves.empty()) {
+                return;
+            }
+
+            int color;
+            if (textcolor == "w" || textcolor == "white") {
+                color = FastBoard::WHITE;
+            } else if (textcolor == "b" || textcolor == "black") {
+                color = FastBoard::BLACK;
+            } else {
+                return;
+            }
+
+            if (until_movenum < 1) {
+                return;
+            }
+            until_movenum += game.get_movenum() - 1;
+
+            for (const auto& move : moves) {
+                if (tag == "avoid") {
+                    add_move_to_avoid(color, move, until_movenum);
+                    if (move != FastBoard::PASS && move != FastBoard::RESIGN) {
+                        if (color == FastBoard::BLACK) {
+                            avoid_not_pass_resign_b = true;
+                        } else {
+                            avoid_not_pass_resign_w = true;
+                        }
+                    }
+                } else {
+                    add_move_to_allow(color, move, until_movenum);
+                    if (color == FastBoard::BLACK) {
+                        allow_b = true;
+                    } else {
+                        allow_w = true;
+                    }
+                }
+            }
+            if ((allow_b && avoid_not_pass_resign_b) ||
+                (allow_w && avoid_not_pass_resign_w)) {
+                /* If "allow" is in use, it is illegal to use "avoid" with any
+                 * move that is not "pass" or "resign". */
+                return;
+            }
+        } else if (tag == "w" || tag == "white") {
+            m_who = FastBoard::WHITE;
+        } else if (tag == "b" || tag == "black") {
+            m_who = FastBoard::BLACK;
+        } else if (tag == "interval") {
+            cmdstream >> m_interval_centis;
+            if (cmdstream.fail()) {
+                return;
+            }
+        } else {
+            return;
+        }
+    }
+}
+
+void AnalyzeTags::add_move_to_avoid(int color, int vertex, size_t until_move) {
+    m_moves_to_avoid.emplace_back(color, until_move, vertex);
+}
+
+void AnalyzeTags::add_move_to_allow(int color, int vertex, size_t until_move) {
+    m_moves_to_allow.emplace_back(color, until_move, vertex);
+}
+
+int AnalyzeTags::interval_centis() const {
+    return m_interval_centis;
+}
+
+int AnalyzeTags::invalid() const {
+    return m_invalid;
+}
+
+int AnalyzeTags::who() const {
+    return m_who;
+}
+
+bool AnalyzeTags::is_to_avoid(int color, int vertex, size_t movenum) const {
+    for (auto& move : m_moves_to_avoid) {
+        if (color == move.color && vertex == move.vertex && movenum <= move.until_move) {
+            return true;
+        }
+    }
+    if (vertex != FastBoard::PASS && vertex != FastBoard::RESIGN) {
+        auto active_allow = false;
+        for (auto& move : m_moves_to_allow) {
+            if (color == move.color && movenum <= move.until_move) {
+                active_allow = true;
+                if (vertex == move.vertex) {
+                    return false;
+                }
+            }
+        }
+        if (active_allow) {
+            return true;
+        }
+    }
+    return false;
+}
+
+bool AnalyzeTags::has_move_restrictions() const {
+    return !m_moves_to_avoid.empty() || !m_moves_to_allow.empty();
+}
 
 std::unique_ptr<Network> GTP::s_network;
 
@@ -168,7 +353,7 @@ void GTP::setup_default_parameters() {
     cfg_cpu_only = false;
 #endif
 
-    cfg_analyze_interval_centis = 0;
+    cfg_analyze_tags = AnalyzeTags{};
 
     // C++11 doesn't guarantee *anything* about how random this is,
     // and in MinGW it isn't random at all. But we can mix it in, which
@@ -421,19 +606,24 @@ void GTP::execute(GameState & game, const std::string& xinput) {
     } else if (command.find("genmove") == 0
                || command.find("lz-genmove_analyze") == 0) {
         auto analysis_output = command.find("lz-genmove_analyze") == 0;
-        auto interval = 0;
 
         std::istringstream cmdstream(command);
         std::string tmp;
-
         cmdstream >> tmp;  // eat genmove
-        cmdstream >> tmp;
-        if (analysis_output) {
-            cmdstream >> interval;
-        }
 
-        if (!cmdstream.fail()) {
-            int who;
+        int who;
+        AnalyzeTags tags;
+
+        if (analysis_output) {
+            tags = AnalyzeTags{cmdstream, game};
+            if (tags.invalid()) {
+                gtp_fail_printf(id, "cannot parse analyze tags");
+                return;
+            }
+            who = tags.who();
+        } else {
+            /* genmove command */
+            cmdstream >> tmp;
             if (tmp == "w" || tmp == "white") {
                 who = FastBoard::WHITE;
             } else if (tmp == "b" || tmp == "black") {
@@ -442,85 +632,63 @@ void GTP::execute(GameState & game, const std::string& xinput) {
                 gtp_fail_printf(id, "syntax error");
                 return;
             }
-            if (analysis_output) {
-                // Start of multi-line response
-                cfg_analyze_interval_centis = interval;
-                if (id != -1) gtp_printf_raw("=%d\n", id);
-                else gtp_printf_raw("=\n");
-            }
-            // start thinking
-            {
-                game.set_to_move(who);
-                // Outputs winrate and pvs for lz-genmove_analyze
-                int move = search->think(who);
-                game.play_move(move);
+        }
 
-                std::string vertex = game.move_to_text(move);
-                if (!analysis_output) {
-                    gtp_printf(id, "%s", vertex.c_str());
-                } else {
-                    gtp_printf_raw("play %s\n", vertex.c_str());
-                }
-            }
-            if (cfg_allow_pondering) {
-                // now start pondering
-                if (!game.has_resigned()) {
-                    // Outputs winrate and pvs through gtp for lz-genmove_analyze
-                    search->ponder();
-                }
+        if (analysis_output) {
+            // Start of multi-line response
+            cfg_analyze_tags = tags;
+            if (id != -1) gtp_printf_raw("=%d\n", id);
+            else gtp_printf_raw("=\n");
+        }
+        // start thinking
+        {
+            game.set_to_move(who);
+            // Outputs winrate and pvs for lz-genmove_analyze
+            int move = search->think(who);
+            game.play_move(move);
+
+            std::string vertex = game.move_to_text(move);
+            if (!analysis_output) {
+                gtp_printf(id, "%s", vertex.c_str());
+            } else {
+                gtp_printf_raw("play %s\n", vertex.c_str());
             }
-            if (analysis_output) {
-                // Terminate multi-line response
-                gtp_printf_raw("\n");
+        }
+
+        if (cfg_allow_pondering) {
+            // now start pondering
+            if (!game.has_resigned()) {
+                // Outputs winrate and pvs through gtp for lz-genmove_analyze
+                search->ponder();
             }
-        } else {
-            gtp_fail_printf(id, "syntax not understood");
         }
-        analysis_output = false;
+        if (analysis_output) {
+            // Terminate multi-line response
+            gtp_printf_raw("\n");
+        }
+        cfg_analyze_tags = {};
         return;
     } else if (command.find("lz-analyze") == 0) {
         std::istringstream cmdstream(command);
         std::string tmp;
-        auto who = game.board.get_to_move();
 
         cmdstream >> tmp; // eat lz-analyze
-        cmdstream >> tmp; // eat side to move or interval
-        if (!cmdstream.fail()) {
-            if (tmp == "w" || tmp == "white") {
-                who = FastBoard::WHITE;
-            } else if (tmp == "b" || tmp == "black") {
-                who = FastBoard::BLACK;
-            } else {
-                // Not side to move, must be interval
-                try {
-                    cfg_analyze_interval_centis = std::stoi(tmp);
-                } catch(...) {
-                    gtp_fail_printf(id, "syntax not understood");
-                    return;
-                }
-            }
-            if (tmp == "w" || tmp == "b" || tmp == "white" || tmp == "black") {
-                // We got a color, so the interval must come now.
-                int interval;
-                cmdstream >> interval;
-                if (!cmdstream.fail()) {
-                    cfg_analyze_interval_centis = interval;
-                } else {
-                    gtp_fail_printf(id, "syntax not understood");
-                    return;
-                }
-            }
+        AnalyzeTags tags{cmdstream, game};
+        if (tags.invalid()) {
+            gtp_fail_printf(id, "cannot parse analyze tags");
+            return;
         }
         // Start multi-line response.
         if (id != -1) gtp_printf_raw("=%d\n", id);
         else gtp_printf_raw("=\n");
         // Now start pondering.
         if (!game.has_resigned()) {
+            cfg_analyze_tags = tags;
             // Outputs winrate and pvs through gtp
-            game.set_to_move(who);
+            game.set_to_move(tags.who());
             search->ponder();
         }
-        cfg_analyze_interval_centis = 0;
+        cfg_analyze_tags = {};
         // Terminate multi-line response
         gtp_printf_raw("\n");
         return;
diff --git a/src/GTP.h b/src/GTP.h
index 1cf4c6706..cbf2b6633 100644
--- a/src/GTP.h
+++ b/src/GTP.h
@@ -40,6 +40,43 @@
 #include "GameState.h"
 #include "UCTSearch.h"
 
+struct MoveToAvoid {
+    int color;
+    size_t until_move;
+    int vertex;
+
+    MoveToAvoid(int color, size_t until_move, int vertex)
+        : color(color), until_move(until_move), vertex(vertex)
+    {}
+
+    bool operator==(const MoveToAvoid other) const {
+        return color == other.color &&
+            until_move == other.until_move && vertex == other.vertex;
+    }
+};
+
+class AnalyzeTags {
+    friend class LeelaTest;
+
+public:
+    AnalyzeTags() = default;
+    AnalyzeTags(std::istringstream& cmdstream, const GameState& game);
+
+    void add_move_to_avoid(int color, int vertex, size_t until_move);
+    void add_move_to_allow(int color, int vertex, size_t until_move);
+    int interval_centis() const;
+    int invalid() const;
+    int who() const;
+    bool is_to_avoid(int color, int vertex, size_t movenum) const;
+    bool has_move_restrictions() const;
+
+private:
+    bool m_invalid{true};
+    std::vector<MoveToAvoid> m_moves_to_avoid, m_moves_to_allow;
+    int m_interval_centis{0};
+    int m_who{FastBoard::INVAL};
+};
+
 extern bool cfg_gtp_mode;
 extern bool cfg_allow_pondering;
 extern int cfg_num_threads;
@@ -82,7 +119,7 @@ extern bool cfg_quiet;
 extern std::string cfg_options_str;
 extern bool cfg_benchmark;
 extern bool cfg_cpu_only;
-extern int cfg_analyze_interval_centis;
+extern AnalyzeTags cfg_analyze_tags;
 
 static constexpr size_t MiB = 1024LL * 1024LL;
 
diff --git a/src/UCTNode.h b/src/UCTNode.h
index ba08b2ef9..5c2913c5e 100644
--- a/src/UCTNode.h
+++ b/src/UCTNode.h
@@ -106,7 +106,7 @@ class UCTNode {
                        float min_psa_ratio);
     double get_blackevals() const;
     void accumulate_eval(float eval);
-    void kill_superkos(const KoState& state);
+    void kill_superkos(const GameState& state);
     void dirichlet_noise(float epsilon, float alpha);
 
     // Note : This class is very size-sensitive as we are going to create
diff --git a/src/UCTNodeRoot.cpp b/src/UCTNodeRoot.cpp
index 883ac57ae..7cff20e68 100644
--- a/src/UCTNodeRoot.cpp
+++ b/src/UCTNodeRoot.cpp
@@ -59,7 +59,10 @@ UCTNode* UCTNode::get_first_child() const {
     return m_children.front().get();
 }
 
-void UCTNode::kill_superkos(const KoState& state) {
+void UCTNode::kill_superkos(const GameState& state) {
+    UCTNodePointer *pass_child = nullptr;
+    size_t valid_count = 0;
+
     for (auto& child : m_children) {
         auto move = child->get_move();
         if (move != FastBoard::PASS) {
@@ -70,9 +73,21 @@ void UCTNode::kill_superkos(const KoState& state) {
                 // Don't delete nodes for now, just mark them invalid.
                 child->invalidate();
             }
+        } else {
+            pass_child = &child;
+        }
+        if (child->valid()) {
+            valid_count++;
         }
     }
 
+    if (valid_count > 1 && pass_child &&
+            !state.is_move_legal(state.get_to_move(), FastBoard::PASS)) {
+        // Remove the PASS node according to "avoid" -- but only if there are
+        // other valid nodes left.
+        (*pass_child)->invalidate();
+    }
+
     // Now do the actual deletion.
     m_children.erase(
         std::remove_if(begin(m_children), end(m_children),
diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp
index 281a25bcc..b42776c13 100644
--- a/src/UCTSearch.cpp
+++ b/src/UCTSearch.cpp
@@ -419,6 +419,10 @@ bool UCTSearch::should_resign(passflag_t passflag, float besteval) {
         }
     }
 
+    if (!m_rootstate.is_move_legal(color, FastBoard::RESIGN)) {
+        return false;
+    }
+
     return true;
 }
 
@@ -520,8 +524,10 @@ int UCTSearch::get_best_move(passflag_t passflag) {
             // We didn't consider passing. Should we have and
             // end the game immediately?
 
+            if (!m_rootstate.is_move_legal(color, FastBoard::PASS)) {
+                myprintf("Passing is forbidden, I'll play on.\n");
             // do we lose by passing?
-            if (relative_score < 0.0f) {
+            } else if (relative_score < 0.0f) {
                 myprintf("Passing loses, I'll play on.\n");
             } else if (relative_score > 0.0f) {
                 myprintf("Passing wins, I'll pass out.\n");
@@ -737,8 +743,8 @@ int UCTSearch::think(int color, passflag_t passflag) {
         Time elapsed;
         int elapsed_centis = Time::timediff_centis(start, elapsed);
 
-        if (cfg_analyze_interval_centis &&
-            elapsed_centis - last_output > cfg_analyze_interval_centis) {
+        if (cfg_analyze_tags.interval_centis() &&
+            elapsed_centis - last_output > cfg_analyze_tags.interval_centis()) {
             last_output = elapsed_centis;
             output_analysis(m_rootstate, *m_root);
         }
@@ -755,7 +761,7 @@ int UCTSearch::think(int color, passflag_t passflag) {
     } while (keeprunning);
 
     // Make sure to post at least once.
-    if (cfg_analyze_interval_centis && last_output == 0) {
+    if (cfg_analyze_tags.interval_centis() && last_output == 0) {
         output_analysis(m_rootstate, *m_root);
     }
 
@@ -806,6 +812,11 @@ std::string UCTSearch::explain_last_think() const {
 }
 
 void UCTSearch::ponder() {
+    auto disable_reuse = cfg_analyze_tags.has_move_restrictions();
+    if (disable_reuse) {
+        m_last_rootstate.reset(nullptr);
+    }
+
     update_root();
 
     m_root->prepare_root_node(m_network, m_rootstate.board.get_to_move(),
@@ -825,10 +836,10 @@ void UCTSearch::ponder() {
         if (result.valid()) {
             increment_playouts();
         }
-        if (cfg_analyze_interval_centis) {
+        if (cfg_analyze_tags.interval_centis()) {
             Time elapsed;
             int elapsed_centis = Time::timediff_centis(start, elapsed);
-            if (elapsed_centis - last_output > cfg_analyze_interval_centis) {
+            if (elapsed_centis - last_output > cfg_analyze_tags.interval_centis()) {
                 last_output = elapsed_centis;
                 output_analysis(m_rootstate, *m_root);
             }
@@ -838,7 +849,7 @@ void UCTSearch::ponder() {
     } while (!Utils::input_pending() && keeprunning);
 
     // Make sure to post at least once.
-    if (cfg_analyze_interval_centis && last_output == 0) {
+    if (cfg_analyze_tags.interval_centis() && last_output == 0) {
         output_analysis(m_rootstate, *m_root);
     }
 
@@ -853,7 +864,9 @@ void UCTSearch::ponder() {
     myprintf("\n%d visits, %d nodes\n\n", m_root->get_visits(), m_nodes.load());
 
     // Copy the root state. Use to check for tree re-use in future calls.
-    m_last_rootstate = std::make_unique<GameState>(m_rootstate);
+    if (!disable_reuse) {
+        m_last_rootstate = std::make_unique<GameState>(m_rootstate);
+    }
 }
 
 void UCTSearch::set_playout_limit(int playouts) {
diff --git a/src/tests/gtests.cpp b/src/tests/gtests.cpp
index bcd0d9558..95d9fe5e5 100644
--- a/src/tests/gtests.cpp
+++ b/src/tests/gtests.cpp
@@ -114,6 +114,8 @@ class LeelaTest: public ::testing::Test {
         return std::make_pair(testing::internal::GetCapturedStdout(),
                               testing::internal::GetCapturedStderr());
     }
+    void test_analyze_cmd(std::string cmd, bool valid, int who, int interval,
+            int avoidlen, int avoidcolor, int avoiduntil);
 
 private:
     std::unique_ptr<GameState> m_gamestate;
@@ -263,3 +265,113 @@ TEST_F(LeelaTest, TimeControl2) {
     expect_regex(result.second, "Black time: 00:02:00, 1 period\\(s\\) of 120 seconds left");
     expect_regex(result.second, "White time: 00:02:00, 1 period\\(s\\) of 120 seconds left");
 }
+
+void LeelaTest::test_analyze_cmd(std::string cmd, bool valid, int who, int interval,
+        int avoidlen, int avoidcolor, int avoiduntil) {
+    // std::cout << "testing " << cmd << std::endl;
+    // avoid_until checks against the absolute game move number, indexed from 0
+    std::istringstream cmdstream(cmd);
+    auto maingame = get_gamestate();
+    AnalyzeTags result{cmdstream, maingame};
+    EXPECT_EQ(result.m_invalid, !valid);
+    if (!valid) return;
+    EXPECT_EQ(result.m_who, who);
+    EXPECT_EQ(result.m_interval_centis, interval);
+    EXPECT_EQ(result.m_moves_to_avoid.size(), avoidlen);
+    if (avoidlen) {
+        EXPECT_EQ(result.m_moves_to_avoid[0].color, avoidcolor);
+        EXPECT_EQ(result.m_moves_to_avoid[0].until_move, avoiduntil);
+    }
+}
+
+// Test parsing the lz-analyze command line
+TEST_F(LeelaTest, AnalyzeParse) {
+    gtp_execute("clear_board");
+
+    test_analyze_cmd("b 50",
+            true, FastBoard::BLACK, 50, 0, -1, -1);
+    test_analyze_cmd("50 b",
+            true, FastBoard::BLACK, 50, 0, -1, -1);
+    test_analyze_cmd("b interval 50",
+            true, FastBoard::BLACK, 50, 0, -1, -1);
+    test_analyze_cmd("interval 50 b",
+            true, FastBoard::BLACK, 50, 0, -1, -1);
+    test_analyze_cmd("b interval",
+            false, -1, -1, -1, -1, -1);
+    test_analyze_cmd("42 w",
+            true, FastBoard::WHITE, 42, 0, -1, -1);
+    test_analyze_cmd("1234",
+            true, FastBoard::BLACK, 1234, 0, -1, -1);
+    gtp_execute("play b q16");
+    test_analyze_cmd("1234",
+            true, FastBoard::WHITE, 1234, 0, -1, -1);
+    test_analyze_cmd("b 100 avoid b k10 1",
+            true, FastBoard::BLACK, 100, 1, FastBoard::BLACK, 1);
+    test_analyze_cmd("b 100 avoid b k10 1 avoid b a1 1",
+            true, FastBoard::BLACK, 100, 2, FastBoard::BLACK, 1);
+    test_analyze_cmd("b 100 avoid w k10 8",
+            true, FastBoard::BLACK, 100, 1, FastBoard::WHITE, 8);
+    gtp_execute("play w q4");
+    test_analyze_cmd("b 100 avoid b k10 8",
+            true, FastBoard::BLACK, 100, 1, FastBoard::BLACK, 9);
+    test_analyze_cmd("100 b avoid b k10 8",
+            true, FastBoard::BLACK, 100, 1, FastBoard::BLACK, 9);
+    test_analyze_cmd("b avoid b k10 8 100",
+            true, FastBoard::BLACK, 100, 1, FastBoard::BLACK, 9);
+    test_analyze_cmd("avoid b k10 8 100 b",
+            true, FastBoard::BLACK, 100, 1, FastBoard::BLACK, 9);
+    test_analyze_cmd("avoid b k10 8 100 w",
+            true, FastBoard::WHITE, 100, 1, FastBoard::BLACK, 9);
+    test_analyze_cmd("avoid b z10 8 100 w",
+            false, -1, -1, -1, -1, -1);
+    test_analyze_cmd("avoid b k10 8 100 w bogus",
+            false, -1, -1, -1, -1, -1);
+    test_analyze_cmd("avoid b k10 8 100 w avoid b pass 17",
+            true, FastBoard::WHITE, 100, 2, FastBoard::BLACK, 9);
+    test_analyze_cmd("avoid b k10 8 w avoid b pass 17",
+            true, FastBoard::WHITE, 0, 2, FastBoard::BLACK, 9);
+
+    gtp_execute("clear_board");
+    test_analyze_cmd("b avoid b a1 10 allow b t1 1",
+            false, -1, -1, -1, -1, -1);
+    test_analyze_cmd("b avoid w a1 10 allow b t1 1",
+            true, FastBoard::BLACK, 0, 1, FastBoard::WHITE, 9);
+    test_analyze_cmd("b avoid b pass 10 allow b t1 1",
+            true, FastBoard::BLACK, 0, 1, FastBoard::BLACK, 9);
+    test_analyze_cmd("b avoid b resign 10 allow b t1 1",
+            true, FastBoard::BLACK, 0, 1, FastBoard::BLACK, 9);
+    test_analyze_cmd("b avoid w c3,c4,d3,d4 2 avoid b pass 50",
+            true, FastBoard::BLACK, 0, 5, FastBoard::WHITE, 1);
+    test_analyze_cmd("b avoid w c3,c4,d3,d4, 2 avoid b pass 50",
+            false, -1, -1, -1, -1, -1);
+
+    gtp_execute("clear_board");
+    test_analyze_cmd("b avoid b q16 1",
+            true, FastBoard::BLACK, 0, 1, FastBoard::BLACK, 0);
+    test_analyze_cmd("b avoid b : 1",
+            false, -1, -1, -1, -1, -1);
+    test_analyze_cmd("b avoid b d4: 1",
+            false, -1, -1, -1, -1, -1);
+    test_analyze_cmd("b avoid b d14: 1",
+            false, -1, -1, -1, -1, -1);
+    test_analyze_cmd("b avoid b :e3 1",
+            false, -1, -1, -1, -1, -1);
+    test_analyze_cmd("b avoid b d:e3 1",
+            false, -1, -1, -1, -1, -1);
+    test_analyze_cmd("b avoid b q16:q16 20",
+            true, FastBoard::BLACK, 0, 1, FastBoard::BLACK, 19);
+    test_analyze_cmd("b avoid b q16:t19 1",
+            true, FastBoard::BLACK, 0, 16, FastBoard::BLACK, 0);
+    test_analyze_cmd("b avoid b t19:q16 1",
+            true, FastBoard::BLACK, 0, 16, FastBoard::BLACK, 0);
+    test_analyze_cmd("b avoid b t16:q19 1",
+            true, FastBoard::BLACK, 0, 16, FastBoard::BLACK, 0);
+    test_analyze_cmd("b avoid b q19:t16 1",
+            true, FastBoard::BLACK, 0, 16, FastBoard::BLACK, 0);
+    test_analyze_cmd("b avoid b a1:t19 1",
+            true, FastBoard::BLACK, 0, 361, FastBoard::BLACK, 0);
+    test_analyze_cmd("b avoid b a1:t19 1 avoid w pass 1 avoid w resign 1",
+            true, FastBoard::BLACK, 0, 363, FastBoard::BLACK, 0);
+    test_analyze_cmd("b avoid b a1:t19,pass,resign 1",
+            true, FastBoard::BLACK, 0, 363, FastBoard::BLACK, 0);
+}

From 51772f4775cb46577c9d1333762cf375bd43a12d Mon Sep 17 00:00:00 2001
From: Hersmunch <hersee_a@hotmail.com>
Date: Fri, 1 Feb 2019 11:13:18 +0000
Subject: [PATCH 27/45] Removed --cpu-only option from USE_CPU_ONLY build.

Generalized output displayed in cases where potentially referring to a CPU
instead of or as well as a GPU.

Pull request #2161.
---
 autogtp/Management.cpp | 2 +-
 autogtp/main.cpp       | 6 +++---
 src/Leela.cpp          | 8 ++++++--
 src/Network.cpp        | 2 +-
 validation/main.cpp    | 4 ++--
 5 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/autogtp/Management.cpp b/autogtp/Management.cpp
index daa762fb4..f9c3874cb 100644
--- a/autogtp/Management.cpp
+++ b/autogtp/Management.cpp
@@ -128,7 +128,7 @@ void Management::giveAssignments() {
                 myGpu = m_gpusList.at(gpu);
             }
             QTextStream(stdout) << "Starting thread " << game + 1 ;
-            QTextStream(stdout) << " on GPU " << gpu << endl;
+            QTextStream(stdout) << " on device " << gpu << endl;
             m_gamesThreads[thread_index] = new Worker(thread_index, myGpu, this);
             connect(m_gamesThreads[thread_index],
                     &Worker::resultReady,
diff --git a/autogtp/main.cpp b/autogtp/main.cpp
index c3c3668e0..fc7a3c593 100644
--- a/autogtp/main.cpp
+++ b/autogtp/main.cpp
@@ -49,11 +49,11 @@ int main(int argc, char *argv[]) {
 
     QCommandLineOption gamesNumOption(
         {"g", "gamesNum"},
-              "Play 'gamesNum' games on one GPU at the same time.",
+              "Play 'gamesNum' games on one device (GPU/CPU) at the same time.",
               "num", "1");
     QCommandLineOption gpusOption(
         {"u", "gpus"},
-              "Index of the GPU to use for multiple GPUs support.",
+              "Index of the device(s) to use for multiple devices support.",
               "num");
     QCommandLineOption keepSgfOption(
         {"k", "keepSgf" },
@@ -119,7 +119,7 @@ int main(int argc, char *argv[]) {
     // Map streams
     QTextStream cerr(stderr, QIODevice::WriteOnly);
     cerr << "AutoGTP v" << AUTOGTP_VERSION << endl;
-    cerr << "Using " << gamesNum << " thread(s) for GPU(s)." << endl;
+    cerr << "Using " << gamesNum << " game thread(s) per device." << endl;
     if (parser.isSet(keepSgfOption)) {
         if (!QDir().mkpath(parser.value(keepSgfOption))) {
             cerr << "Couldn't create output directory for self-play SGF files!"
diff --git a/src/Leela.cpp b/src/Leela.cpp
index a2456a1a1..f95779478 100644
--- a/src/Leela.cpp
+++ b/src/Leela.cpp
@@ -93,10 +93,12 @@ static void parse_commandline(int argc, char *argv[]) {
         ("noponder", "Disable thinking on opponent's time.")
         ("benchmark", "Test network and exit. Default args:\n-v3200 --noponder "
                       "-m0 -t1 -s1.")
-        ("cpu-only", "Use CPU-only implementation and do not use GPU.")
+#ifndef USE_CPU_ONLY
+        ("cpu-only", "Use CPU-only implementation and do not use OpenCL device(s).")
+#endif
         ;
 #ifdef USE_OPENCL
-    po::options_description gpu_desc("GPU options");
+    po::options_description gpu_desc("OpenCL device options");
     gpu_desc.add_options()
         ("gpu",  po::value<std::vector<int> >(),
                 "ID of the OpenCL device(s) to use (disables autodetection).")
@@ -305,9 +307,11 @@ static void parse_commandline(int argc, char *argv[]) {
         cfg_dumbpass = true;
     }
 
+#ifndef USE_CPU_ONLY
     if (vm.count("cpu-only")) {
         cfg_cpu_only = true;
     }
+#endif
 
     if (vm.count("playouts")) {
         cfg_max_playouts = vm["playouts"].as<int>();
diff --git a/src/Network.cpp b/src/Network.cpp
index f8787391a..e9182bdf0 100644
--- a/src/Network.cpp
+++ b/src/Network.cpp
@@ -666,7 +666,7 @@ void Network::compare_net_outputs(const Netresult& data,
     error = std::sqrt(error);
 
     if (error > max_error || std::isnan(error)) {
-        printf("Error in OpenCL calculation: Update your GPU drivers "
+        printf("Error in OpenCL calculation: Update your device's OpenCL drivers "
                "or reduce the amount of games played simultaneously.\n");
         throw std::runtime_error("OpenCL self-check mismatch.");
     }
diff --git a/validation/main.cpp b/validation/main.cpp
index 78a50619d..b6198753e 100644
--- a/validation/main.cpp
+++ b/validation/main.cpp
@@ -62,11 +62,11 @@ int main(int argc, char *argv[]) {
             "lower:upper", "0.0:35.0");
     QCommandLineOption gamesNumOption(
         {"g", "gamesNum"},
-            "Play 'gamesNum' games on one GPU at the same time.",
+            "Play 'gamesNum' games on one device (GPU/CPU) at the same time.",
             "num", "1");
     QCommandLineOption gpusOption(
         {"u", "gpus"},
-            "Index of the GPU to use for multiple GPUs support.",
+            "Index of the device(s) to use for multiple devices support.",
             "num");
     QCommandLineOption keepSgfOption(
         {"k", "keepSgf" },

From 22394ede2a7bad1e6d18ca37d52a0f79cc55aa44 Mon Sep 17 00:00:00 2001
From: Junhee Yoo <33939814+ihavnoid@users.noreply.github.com>
Date: Fri, 1 Feb 2019 22:46:00 +0900
Subject: [PATCH 28/45] Tensor Core support with PTX inline assembly.

* Tensor core support for half precision
* hgemm : Added m16n16k16/m32n8k16/m8n32k16 tuning

Tuner will see which shaped multiplication is fastest.
MDIMA represents the M dimension, NDIMB represents the N dimension.

* tensorcore : Test m16n16k16 typs only for checking tensorcore availability

It seems that there are cases where only m16n16k16 is supported.
If other formats are not available they will be auto-disabled on tuning.

Pull request #2049.
---
 src/OpenCL.cpp                              |  59 +++-
 src/OpenCL.h                                |   3 +
 src/Tuner.cpp                               | 189 ++++++++---
 src/Tuner.h                                 |   9 +-
 src/kernels/clblast/hgemm_tensorcore.opencl | 359 ++++++++++++++++++++
 src/kernels/tensorcore_test.opencl          |  35 ++
 6 files changed, 606 insertions(+), 48 deletions(-)
 create mode 100644 src/kernels/clblast/hgemm_tensorcore.opencl
 create mode 100644 src/kernels/tensorcore_test.opencl

diff --git a/src/OpenCL.cpp b/src/OpenCL.cpp
index 23c2bfd3d..2701e441c 100644
--- a/src/OpenCL.cpp
+++ b/src/OpenCL.cpp
@@ -70,6 +70,10 @@ const std::string sourceCode_common =
     #include "kernels/common.opencl"
 ;
 
+static const std::string sourceCode_tensorcore_test =
+    #include "kernels/tensorcore_test.opencl"
+;
+
 static const std::string sourceCode_config = R"(
 #define BOARD_SIZE )" + std::to_string(BOARD_SIZE) +
 "\n#define NUM_INTERSECTIONS " + std::to_string(NUM_INTERSECTIONS) +
@@ -86,10 +90,14 @@ static const std::string sourceCode_convolve3 =
 ;
 
 const std::string sourceCode_sgemm =
+    "#if TCE == 1\n" // Enable tensorcore
+    #include "kernels/clblast/hgemm_tensorcore.opencl"
+    "\n#else\n" // Use clblast
     #include "kernels/clblast/xgemm_part1.opencl"
     #include "kernels/clblast/xgemm_part2.opencl"
     #include "kernels/clblast/xgemm_part3.opencl"
     #include "kernels/clblast/xgemm_batched.opencl"
+    "\n#endif\n"
 ;
 
 template <typename net_t>
@@ -351,6 +359,10 @@ void OpenCL_Network<net_t>::convolve3(OpenCLContext & opencl_context,
     auto vwn = m_opencl.m_sgemm_tuners.vwn;
     auto mdimc = m_opencl.m_sgemm_tuners.mdimc;
     auto ndimc = m_opencl.m_sgemm_tuners.ndimc;
+    auto tce = m_opencl.m_sgemm_tuners.tce;
+    auto mdima = m_opencl.m_sgemm_tuners.mdima;
+    auto ndimb = m_opencl.m_sgemm_tuners.ndimb;
+
     auto wavefront_size = m_opencl.m_wavefront_size;
 
     assert(mwg != 0);
@@ -405,6 +417,13 @@ void OpenCL_Network<net_t>::convolve3(OpenCLContext & opencl_context,
                                   (n_ceil * ndimc) / nwg,
                                   cl::size_type(WINOGRAD_TILE)};
 
+        // tensorcore implementation uses a different dimension
+        if (tce) {
+            local_sgemm = {32 * mdimc/mdima, ndimc/ndimb, 1};
+            size_sgemm = {32 * m_ceil / mdima * mdimc / mwg,
+                          n_ceil / ndimb * ndimc / nwg,
+                          cl::size_type(WINOGRAD_TILE)};
+        }
         queue.enqueueNDRangeKernel(sgemm_kernel, cl::NullRange,
                                    size_sgemm, local_sgemm);
     } catch (const cl::Error &e) {
@@ -579,8 +598,12 @@ void OpenCL<net_t>::process_tuners(std::string tuners) {
     auto kwg = false;
     auto ndimc = false;
     auto mdimc = false;
+    auto mdima = false;
+    auto ndimb = false;
     auto vwm = false;
     auto vwn = false;
+    auto tce = false;
+
     while (ss >> buf) {
         found = buf.find("=");
         if (found == std::string::npos) {
@@ -601,6 +624,14 @@ void OpenCL<net_t>::process_tuners(std::string tuners) {
             m_sgemm_tuners.kwg = value;
             kwg = true;
         }
+        if (name == "-DMDIMA") {
+            m_sgemm_tuners.mdima = value;
+            mdima = true;
+        }
+        if (name == "-DNDIMB") {
+            m_sgemm_tuners.ndimb = value;
+            ndimb = true;
+        }
         if (name == "-DMDIMC") {
             m_sgemm_tuners.mdimc = value;
             mdimc = true;
@@ -617,8 +648,12 @@ void OpenCL<net_t>::process_tuners(std::string tuners) {
             m_sgemm_tuners.vwn = value;
             vwn = true;
         }
+        if (name == "-DTCE") {
+            m_sgemm_tuners.tce = value;
+            tce = true;
+        }
     }
-    if (!mwg || !nwg || !kwg || !mdimc || !ndimc || !vwm || !vwn) {
+    if (!mwg || !nwg || !kwg || !mdimc || !ndimc || !vwm || !vwn || !mdima || !ndimb) {
         std::cerr << "Missing tuner parameters";
         if (!mwg) {
             std::cerr << " MWG";
@@ -629,6 +664,12 @@ void OpenCL<net_t>::process_tuners(std::string tuners) {
         if (!kwg) {
             std::cerr << " KWG";
         }
+        if (!mdima) {
+            std::cerr << " MDIMA";
+        }
+        if (!ndimb) {
+            std::cerr << " NDIMB";
+        }
         if (!mdimc) {
             std::cerr << " MDIMC";
         }
@@ -641,6 +682,9 @@ void OpenCL<net_t>::process_tuners(std::string tuners) {
         if (!vwn) {
             std::cerr << " VWN";
         }
+        if (!tce) {
+            std::cerr << " VWN";
+        }
         std::cerr << std::endl;
         std::exit(-1);
     }
@@ -790,6 +834,15 @@ OpenCL<net_t>::OpenCL(int gpu, bool silent) {
     } else {
         myprintf("No.\n");
     }
+
+    myprintf("Tensor Core support: ");
+    try {
+        cl::Program(m_context, sourceCode_tensorcore_test).build(m_cl_args.c_str());
+        m_tensorcore = true;
+        myprintf("Yes.\n");
+    } catch (...) {
+        myprintf("No.\n");
+    }
 }
 
 template <typename net_t>
@@ -808,6 +861,10 @@ void OpenCL<net_t>::initialize(const int channels) {
     }
 
     auto t = Tuner<net_t>(*this, m_context, m_device);
+    if (m_tensorcore) {
+        t.enable_tensorcore();
+    }
+
     auto sgemm_tuners =
         t.load_sgemm_tuners(channels, WINOGRAD_P, channels, WINOGRAD_TILE);
 
diff --git a/src/OpenCL.h b/src/OpenCL.h
index 5c86f06bf..86249a426 100644
--- a/src/OpenCL.h
+++ b/src/OpenCL.h
@@ -214,13 +214,16 @@ class OpenCL {
     struct sgemm_tuners {
         size_t mwg, nwg, kwg;
         size_t vwm, vwn;
+        size_t mdima, ndimb;
         size_t mdimc, ndimc;
+        size_t tce;
     };
     sgemm_tuners m_sgemm_tuners;
     size_t m_wavefront_size{0};
     size_t m_max_workgroup_size{0};
     std::vector<size_t> m_max_workgroup_dims;
     bool m_fp16_compute{false};
+    bool m_tensorcore{false};
     bool m_init_ok{false};
 };
 
diff --git a/src/Tuner.cpp b/src/Tuner.cpp
index 5fee22cf3..c16a3b37d 100644
--- a/src/Tuner.cpp
+++ b/src/Tuner.cpp
@@ -134,33 +134,66 @@ static bool IsMultiple(const size_t a, const size_t b) {
 
 template <typename net_t>
 bool Tuner<net_t>::valid_config_sgemm(Parameters p, bool exhaustive) {
-    if (!IsMultiple(p["MWG"], p["MDIMC"]*p["VWM"])) {
-        return false;
-    }
-    if (!IsMultiple(p["NWG"], p["NDIMC"]*p["VWN"])) {
-        return false;
-    }
-    if (!IsMultiple(p["MWG"], p["MDIMA"]*p["VWM"])) {
-        return false;
-    }
-    if (!IsMultiple(p["NWG"], p["NDIMB"]*p["VWN"])) {
-        return false;
-    }
-    if (!IsMultiple(p["KWG"], p["MDIMC"]*p["NDIMC"]/p["MDIMA"])) {
-        return false;
-    }
-    if (!IsMultiple(p["KWG"], p["MDIMC"]*p["NDIMC"]/p["NDIMB"])) {
-        return false;
-    }
-    // Extra restrictions for a fast tuning run
-    if (!exhaustive) {
-        if (p["MDIMC"] != p["MDIMA"]) {
+    if (p["TCE"] == 0) {
+        if (!IsMultiple(p["MWG"], p["MDIMC"]*p["VWM"])) {
+            return false;
+        }
+        if (!IsMultiple(p["NWG"], p["NDIMC"]*p["VWN"])) {
+            return false;
+        }
+        if (!IsMultiple(p["MWG"], p["MDIMA"]*p["VWM"])) {
+            return false;
+        }
+        if (!IsMultiple(p["NWG"], p["NDIMB"]*p["VWN"])) {
+            return false;
+        }
+        if (!IsMultiple(p["KWG"], p["MDIMC"]*p["NDIMC"]/p["MDIMA"])) {
+            return false;
+        }
+        if (!IsMultiple(p["KWG"], p["MDIMC"]*p["NDIMC"]/p["NDIMB"])) {
+            return false;
+        }
+        // Extra restrictions for a fast tuning run
+        if (!exhaustive) {
+            if (p["MDIMC"] != p["MDIMA"]) {
+                return false;
+            }
+            if (p["NDIMC"] != p["NDIMB"]) {
+                return false;
+            }
+            if (p["SA"] != p["SB"]) {
+                return false;
+            }
+        }
+    } else {
+        if (!m_use_tensorcore) {
+            return false;
+        }
+
+        // In Tensor Core implementations, MDIMA and NDIMB represents the
+        // wmmv multiplication dimensions, that is,
+        // m16n16k16 / m32n8k16 / m8n32k16.  Thus m * n is fixed to 256.
+        if (p["MDIMA"] * p["NDIMB"] != 256) {
+            return false;
+        }
+        if (p["MWG"] < p["MDIMC"]) {
+            return false;
+        }
+        if (p["NWG"] < p["NDIMC"]) {
             return false;
         }
-        if (p["NDIMC"] != p["NDIMB"]) {
+        if (p["MDIMC"] < p["MDIMA"]) {
             return false;
         }
-        if (p["SA"] != p["SB"]) {
+        if (p["NDIMC"] < p["NDIMB"]) {
+            return false;
+        }
+        // VWM / VWN has no meaning if we don't do SA / SB.
+        // Only test VWM / VWN == 2
+        if (p["SA"] == 0 && p["VWM"] != 2) {
+            return false;
+        }
+        if (p["SB"] == 0 && p["VWN"] != 2) {
             return false;
         }
     }
@@ -260,8 +293,7 @@ static float compare_ref(std::vector<net_t> &x, std::vector<net_t> &ref,
 }
 
 template <typename net_t>
-std::string Tuner<net_t>::tune_sgemm(const int m, const int n, const int k,
-                              const int batch_size, const int runs) {
+std::vector<Parameters> Tuner<net_t>::build_valid_params() {
     auto opts = std::vector<Configurations>();
     if (cfg_sgemm_exhaustive) {
         opts = {
@@ -298,7 +330,76 @@ std::string Tuner<net_t>::tune_sgemm(const int m, const int n, const int k,
             {"SB", {1}},
         };
     }
+    // Tensor Core options
+    auto topts = std::vector<Configurations>();
+    if (cfg_sgemm_exhaustive) {
+        topts = {
+            {"MWG", {32, 64, 128, 256}},
+            {"NWG", {8, 16, 32, 64}},
+            {"KWG", {16, 32, 64}},
+            {"MDIMC", {8, 16, 32, 64}},
+            {"NDIMC", {8, 16, 32, 64}},
+            {"MDIMA", {8, 16, 32}},
+            {"NDIMB", {8, 16, 32}},
+            {"KWI", {2}},
+            {"VWM", {2, 4, 8}},
+            {"VWN", {2, 4, 8}},
+            {"STRM", {0}},
+            {"STRN", {0}},
+            {"SA", {0, 1}},
+            {"SB", {0, 1}},
+        };
+    } else {
+        topts = {
+            {"MWG", {32, 64, 128}},
+            {"NWG", {8, 16, 32}},
+            {"KWG", {16, 32}},
+            {"MDIMC", {8, 16, 32}},
+            {"NDIMC", {8, 16, 32}},
+            {"MDIMA", {8, 16, 32}},
+            {"NDIMB", {8, 16, 32}},
+            {"KWI", {2}},
+            {"VWM", {2}},
+            {"VWN", {2}},
+            {"STRM", {0}},
+            {"STRN", {0}},
+            {"SA", {0}},
+            {"SB", {0}},
+        };
+    }
+
+    // Don't use thead Rng or determinism will depend
+    // on whether tuner ran.
+    auto rng = Random{0};
+
+    auto valid_params = std::vector<Parameters>{};
+    auto build_from = [this, &rng, &valid_params](std::vector<Configurations> & opts, int tce) {
+        auto cfgs = 1;
+        for (auto c = size_t{0}; c < opts.size(); c++) {
+            cfgs *= opts[c].second.size();
+        }
+        for (auto i = 0; i < cfgs; i++) {
+            Parameters param = get_parameters_by_int(opts, i);
+            param["TCE"] = tce;
+            if (valid_config_sgemm(param, cfg_sgemm_exhaustive)) {
+                if (cfg_sgemm_exhaustive) {
+                    if (rng.randfix<16>() != 0) {
+                        continue;
+                    }
+                }
+                valid_params.push_back(param);
+            }
+        }
+    };
+    build_from(opts, 0);
+    build_from(topts, 1);
+
+    return std::move(valid_params);
+}
 
+template <typename net_t>
+std::string Tuner<net_t>::tune_sgemm(const int m, const int n, const int k,
+                              const int batch_size, const int runs) {
     // This needs to be at minimum the maximum (MNK/WG) values above.
     auto m_max = std::max(64, m);
     auto n_max = std::max(64, n);
@@ -335,26 +436,8 @@ std::string Tuner<net_t>::tune_sgemm(const int m, const int n, const int k,
 
     myprintf("\nStarted OpenCL SGEMM tuner.\n");
 
-    auto valid_params = std::vector<int>{};
-    auto cfgs = 1;
-    for (auto c = size_t{0}; c < opts.size(); c++) {
-        cfgs *= opts[c].second.size();
-    }
-
-    // Don't use thead Rng or determism will depend on if tuner ran.
-    auto rng = Random{0};
+    auto valid_params = build_valid_params();
 
-    for (auto i = 0; i < cfgs; i++) {
-        Parameters param = get_parameters_by_int(opts, i);
-        if (valid_config_sgemm(param, cfg_sgemm_exhaustive)) {
-            if (cfg_sgemm_exhaustive) {
-                if (rng.randfix<16>() != 0) {
-                    continue;
-                }
-            }
-            valid_params.emplace_back(i);
-        }
-    }
     myprintf("Will try %zu valid configurations.\n", valid_params.size());
 
     std::string best_params;
@@ -375,10 +458,9 @@ std::string Tuner<net_t>::tune_sgemm(const int m, const int n, const int k,
     auto failed_enqueue = 0;
     auto failed_error = 0;
 
-    for (const auto& i : valid_params) {
+    for (auto & p : valid_params) {
         param_counter++;
 
-        auto p = get_parameters_by_int(opts, i);
         auto defines = parameters_to_defines(p);
 
         try {
@@ -426,6 +508,13 @@ std::string Tuner<net_t>::tune_sgemm(const int m, const int n, const int k,
         cl::NDRange size_sgemm = {(m_ceil * p["MDIMC"]) / p["MWG"],
                                   (n_ceil * p["NDIMC"]) / p["NWG"],
                                   size_t(batch_size)};
+        // Tensor Core implementation uses a different dimension.
+        if (p["TCE"]) {
+            local_sgemm = {32 * p["MDIMC"] / p["MDIMA"], p["NDIMC"] / p["NDIMB"], 1};
+            size_sgemm = {32 * m_ceil / p["MDIMA"] * p["MDIMC"] / p["MWG"],
+                          n_ceil / p["NDIMB"] * p["NDIMC"] / p["NWG"],
+                          size_t(batch_size)};
+        }
 
         auto sum = 0.0f;
         auto error = 0.0f;
@@ -625,6 +714,14 @@ std::string Tuner<net_t>::load_sgemm_tuners(const int m, const int n, const int
     return tuners;
 }
 
+template <typename net_t>
+void Tuner<net_t>::enable_tensorcore() {}
+
+template <>
+void Tuner<half_float::half>::enable_tensorcore() {
+    m_use_tensorcore = true;
+}
+
 template class Tuner<float>;
 #ifdef USE_HALF
 template class Tuner<half_float::half>;
diff --git a/src/Tuner.h b/src/Tuner.h
index 30e230fe5..3297a1a18 100644
--- a/src/Tuner.h
+++ b/src/Tuner.h
@@ -45,6 +45,7 @@ class Tuner {
     OpenCL<net_t> & m_opencl;
     cl::Context m_context;
     cl::Device m_device;
+    bool m_use_tensorcore = false;
 public:
     std::string tune_sgemm(const int m, const int n, const int k,
                            const int batch_size, const int runs = 4);
@@ -55,9 +56,14 @@ class Tuner {
     // This is to prevent the same device from being tuned multiple times.
     static std::vector<std::string> tuned_devices;
 
-    static constexpr auto TUNER_VERSION = 0;
+    // version 0 : Initial release
+    // version 1 : Tuner with additional tensor cores (parameter TCE)
+    static constexpr auto TUNER_VERSION = 1;
+
     Tuner(OpenCL<net_t> & opencl, cl::Context context, cl::Device device) :
         m_opencl(opencl), m_context(context), m_device(device) {}
+
+    void enable_tensorcore();
 private:
     void store_sgemm_tuners(const int m, const int n, const int k,
                             const int batch_size, std::string tuners);
@@ -69,6 +75,7 @@ class Tuner {
     std::string sgemm_tuners_from_line(std::string line, const int m,
                                        const int n, const int k,
                                        const int batch_size);
+    std::vector<Parameters> build_valid_params();
 };
 
 #endif
diff --git a/src/kernels/clblast/hgemm_tensorcore.opencl b/src/kernels/clblast/hgemm_tensorcore.opencl
new file mode 100644
index 000000000..cc5c08eeb
--- /dev/null
+++ b/src/kernels/clblast/hgemm_tensorcore.opencl
@@ -0,0 +1,359 @@
+/*
+    This file is part of Leela Zero.
+    Copyright (C) 2017-2018 Junhee Yoo and contributors
+
+    Leela Zero is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Leela Zero is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+
+// This is the tensor core implementation of XgemmBatched.  Can only be used on
+// GPUs with NVIDIA's Volta / Turing architectures with wmmv instructions.
+
+// Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string
+// literal). Comment-out this line for syntax-highlighting when developing.
+
+R"(
+#define USE_TC
+
+#ifndef SA
+#define SA 1
+#endif
+
+#ifndef SB
+#define SB 1
+#endif
+
+#ifndef VWM
+#define VWM 4
+#endif
+
+#ifndef VWN
+#define VWN 2
+#endif
+
+#if VWM == 1
+#define vstoreM vstore
+#define vloadM vload
+#elif VWM == 2
+#define vstoreM vstore2
+#define vloadM vload2
+#elif VWM == 4
+#define vstoreM vstore4
+#define vloadM vload4
+#elif VWM == 8
+#define vstoreM vstore8
+#define vloadM vload8
+#elif VWM == 16
+#define vstoreM vstore16
+#define vloadM vload16
+#endif
+
+#if VWN == 1
+#define vstoreN vstore
+#define vloadN vload
+#elif VWN == 2
+#define vstoreN vstore2
+#define vloadN vload2
+#elif VWN == 4
+#define vstoreN vstore4
+#define vloadN vload4
+#elif VWN == 8
+#define vstoreN vstore8
+#define vloadN vload8
+#elif VWN == 16
+#define vstoreN vstore16
+#define vloadN vload16
+#endif
+
+#define WARP_SIZE 32
+
+#if MDIMA == 32 && NDIMB == 8
+#define WMMA_SHAPE "m32n8k16"
+#elif MDIMA == 16 && NDIMB == 16
+#define WMMA_SHAPE "m16n16k16"
+#elif MDIMA == 8 && NDIMB == 32
+#define WMMA_SHAPE "m8n32k16"
+#else
+#error Unsupported MDIMA / NDIMB combination
+#endif
+
+
+void GlobalToLocalA(int tid, int stride, __local short * alm, __global short * agm)
+{
+    const int copy_size = KWG * MWG;
+    const int dest_stride = MWG;
+    const int num_threads = MDIMC * NDIMC * WARP_SIZE / (MDIMA * NDIMB);
+
+#pragma unroll
+    for(int i=tid * VWM; i < copy_size; i += num_threads * VWM) {
+        int x = i % dest_stride;
+        int y = i / dest_stride;
+
+        vstoreM( vloadM((y * stride + x) / VWM, agm), i / VWM, alm);
+    }
+}
+
+
+void GlobalToLocalB(int tid, int stride, __local short * blm, __global short * bgm)
+{
+    const int copy_size = KWG * NWG;
+    const int dest_stride = NWG;
+    const int num_threads = MDIMC * NDIMC * WARP_SIZE / (MDIMA * NDIMB);
+#pragma unroll
+    for(int i=tid * VWN; i < copy_size; i += num_threads * VWN) {
+        int x = i % dest_stride;
+        int y = i / dest_stride;
+        vstoreN( vloadN((y * stride + x) / VWN, bgm), i / VWN, blm);
+    }
+}
+
+
+void HgemmBody(const int kSizeM, const int kSizeN, const int kSizeK,
+                  #if SA == 1
+                    __local short* alm,
+                  #endif
+                  #if SB == 1
+                    __local short* blm,
+                  #endif
+                  const __global half* restrict agm,
+                  const __global half* restrict bgm,
+                  __global half* restrict cgm)
+{
+    int laneid;
+    asm("mov.u32 %0, %%laneid;" : "=r"(laneid));
+
+    // the base location of the MDIMA * NDIMB tile number this thread is responsible of
+    int tile_m = get_global_id(0) / WARP_SIZE * MWG / MDIMC;
+    int tile_n = get_global_id(1) * NWG / NDIMC;
+
+    // the base pointers of agm, bgm and cgm
+    const __global half * agm_ = agm + MDIMA * tile_m;
+    const __global half * bgm_ = bgm + NDIMB * tile_n;
+    __global half * cgm_ = cgm + kSizeM * NDIMB * tile_n + MDIMA * tile_m;
+
+    // the (m,n) position within the warp
+    int offset_number = laneid;
+    int offset_m = offset_number % (MDIMA/2);
+    int offset_n = offset_number / (MDIMA/2);
+
+    if(laneid != get_global_id(0) % WARP_SIZE) {
+        // this is just to make sure we crash ourselves if the basic assumption doesn't hold
+        return;
+    }
+
+    int k, m, n, mb, nb, kb, kwg;
+#ifdef USE_TC
+    int zero_pair;
+    asm("{\n"
+        ".reg .b16 xh;\n"
+        ".reg .b32 x;\n"
+        "mov.f32 x, 0.0;\n"
+        "cvt.rz.f16.f32 xh, x;\n"
+        "mov.b32 %0, {xh,xh};\n"
+        "}": "=r"(zero_pair)
+    );
+
+#pragma promote_to_registers
+    int c0[MWG/MDIMC][NWG/NDIMC];
+#pragma promote_to_registers
+    int c1[MWG/MDIMC][NWG/NDIMC];
+#pragma promote_to_registers
+    int c2[MWG/MDIMC][NWG/NDIMC];
+#pragma promote_to_registers
+    int c3[MWG/MDIMC][NWG/NDIMC];
+    #pragma unroll
+    for(mb = 0; mb < MWG / MDIMC; mb += 1) {
+        #pragma unroll
+        for(nb = 0; nb < NWG / NDIMC; nb += 1) {
+            c0[mb][nb] = zero_pair;
+            c1[mb][nb] = zero_pair;
+            c2[mb][nb] = zero_pair;
+            c3[mb][nb] = zero_pair;
+        }
+    }
+#else
+    float acc[MWG/MDIMC][NWG/NDIMC][2][4];
+    for(mb = 0; mb < MWG / MDIMC; mb += 1) {
+        for(nb = 0; nb < NWG / NDIMC; nb += 1) {
+            for(m=0; m<2; m++) {
+                for(int n=0; n<4; n++) {
+                    acc[mb][nb][m][n] = 0.0f;
+                }
+            }
+        }
+    }
+#endif
+    for(kwg = 0; kwg < kSizeK; kwg += KWG) {
+#if SA == 1
+        GlobalToLocalA(get_local_id(0) + get_local_id(1) * WARP_SIZE * MDIMC / MDIMA, kSizeM,
+            alm,
+            (__global short *)(agm + get_group_id(0) * MWG + kwg * kSizeM)
+        );
+#endif
+
+#if SB == 1
+        GlobalToLocalB(get_local_id(0) +  get_local_id(1) * WARP_SIZE * MDIMC / MDIMA, kSizeN,
+            blm,
+            (__global short *)(bgm + get_group_id(1) * NWG + kwg * kSizeN)
+        );
+
+#endif
+
+#if SA == 1 || SB == 1
+        barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+
+#pragma unroll
+        for(kb = 0; kb < KWG; kb += 16) {
+#pragma unroll
+            for(mb = 0; mb < MWG / MDIMC; mb += 1) {
+#pragma unroll
+                for(nb = 0; nb < NWG / NDIMC; nb += 1) {
+#if SA == 1
+                    const int block_loc_m = (get_local_id(0)/WARP_SIZE) % (MDIMC/MDIMA);
+                    const int agm_stride = MWG;
+                    const __local half * b_agm_ = (const __local half *)(alm + (mb + block_loc_m * (MWG/MDIMC)) * MDIMA);
+                    const __local half * bb_agm_ = b_agm_ + agm_stride * kb;
+#else
+                    const int agm_stride = kSizeM;
+                    const __global half * b_agm_ = agm_ + mb * MDIMA;
+                    const __global half * bb_agm_ = b_agm_ + kSizeM * (kb + kwg);
+#endif
+
+#if SB == 1
+                    const int block_loc_n = (get_local_id(1)) % (NDIMC/NDIMB);
+                    const int bgm_stride = NWG;
+                    const __local half * b_bgm_ = (const __local half *)(blm + (nb + block_loc_n * (NWG/NDIMC)) * NDIMB);
+                    const __local half * bb_bgm_ = b_bgm_ + bgm_stride * kb;
+#else
+                    const int bgm_stride = kSizeN;
+                    const __global half * b_bgm_ = bgm_ + nb * NDIMB;
+                    const __global half * bb_bgm_ = b_bgm_ + kSizeN * (kb + kwg);
+#endif
+#ifdef USE_TC
+                    int d0_, d1_, d2_, d3_;
+                    int c0_ = c0[mb][nb];
+                    int c1_ = c1[mb][nb];
+                    int c2_ = c2[mb][nb];
+                    int c3_ = c3[mb][nb];
+                    asm("{\n"
+                        ".reg .b32 a0, a1, a2, a3, a4, a5, a6, a7;\n"
+                        ".reg .b32 b0, b1, b2, b3, b4, b5, b6, b7;\n"
+#if SA == 1
+                        "wmma.load.a.sync.aligned." WMMA_SHAPE ".shared.col.f16 {a0,a1,a2,a3,a4,a5,a6,a7}, [%4], %6;\n"
+#else
+                        "wmma.load.a.sync.aligned." WMMA_SHAPE ".col.f16 {a0,a1,a2,a3,a4,a5,a6,a7}, [%4], %6;\n"
+#endif
+#if SB == 1
+                        "wmma.load.b.sync.aligned." WMMA_SHAPE ".shared.row.f16 {b0,b1,b2,b3,b4,b5,b6,b7}, [%5], %7;\n"
+#else
+                        "wmma.load.b.sync.aligned." WMMA_SHAPE ".row.f16 {b0,b1,b2,b3,b4,b5,b6,b7}, [%5], %7;\n"
+#endif
+                        "wmma.mma.sync.aligned.col.row." WMMA_SHAPE ".f16.f16 "
+                        "    {%0,%1,%2,%3},\n"
+                        "    {a0,a1,a2,a3,a4,a5,a6,a7},\n"
+                        "    {b0,b1,b2,b3,b4,b5,b6,b7},\n"
+                        "    {%8,%9,%10,%11};\n"
+                        "}": "=r"(d0_), "=r"(d1_), "=r"(d2_), "=r"(d3_) : "l"(bb_agm_), "l"(bb_bgm_), "r"(agm_stride), "r"(bgm_stride), "r"(c0_), "r"(c1_), "r"(c2_), "r"(c3_));
+                    c0[mb][nb] = d0_;
+                    c1[mb][nb] = d1_;
+                    c2[mb][nb] = d2_;
+                    c3[mb][nb] = d3_;
+#else
+                    for(m = offset_m; m < MDIMA; m += MDIMA/2) {
+                        for(n = offset_n; n < NDIMB; n += NDIMB/4) {
+                            float a = 0.0f;
+                            for(k = 0; k < 16; k++) {
+                                a += vload_half(agm_stride * k + m, bb_agm_) * vload_half(bgm_stride * k + n, bb_bgm_);
+                            }
+                            acc[mb][nb][m/(MDIMA/2)][n/(NDIMB/4)] += a;
+                        }
+                    }
+#endif
+                }
+            }
+        }
+    }
+
+#ifdef USE_TC
+#pragma unroll
+    for(mb = 0; mb < MWG / MDIMC; mb += 1) {
+#pragma unroll
+        for(nb = 0; nb < NWG / NDIMC; nb += 1) {
+            int c0_ = c0[mb][nb];
+            int c1_ = c1[mb][nb];
+            int c2_ = c2[mb][nb];
+            int c3_ = c3[mb][nb];
+            __global half * b_cgm_ = cgm_ + kSizeM * nb * NDIMB + mb * MDIMA;
+            asm("{\n"
+                "wmma.store.d.sync.aligned.col." WMMA_SHAPE ".f16 [%4], {%0,%1,%2,%3}, %5;"
+                "}" : : "r"(c0_), "r"(c1_), "r"(c2_), "r"(c3_), "l"(b_cgm_), "r"(kSizeM));
+        }
+    }
+#else
+    for(mb = 0; mb < MWG / MDIMC; mb += 1) {
+        for(nb = 0; nb < NWG / NDIMC; nb += 1) {
+            for(m = offset_m; m < MDIMA; m += MDIMA/2) {
+                for(n = offset_n; n < NDIMB; n += NDIMB/4) {
+                    vstore_half(acc[mb][nb][m/(MDIMA/2)][n/(NDIMB/4)], kSizeM * (nb * NDIMB + n) + mb * MDIMA + m, cgm_);
+                }
+            }
+        }
+    }
+#endif
+}
+
+struct alm_t {short alm[KWG * MWG];} __attribute__((aligned(32)));
+struct blm_t {short blm[KWG * NWG];} __attribute__((aligned(32)));
+
+__kernel __attribute__((reqd_work_group_size(32*MDIMC/MDIMA, NDIMC/NDIMB, 1)))
+void XgemmBatched(const int kSizeM, const int kSizeN, const int kSizeK,
+                  const __global half* restrict agm,
+                  const __global half* restrict bgm,
+                  __global half* restrict cgm)
+{
+    // Sets the offsets
+    const int batch = get_group_id(2);
+    const int a_offset = kSizeM*kSizeK*batch;
+    const int b_offset = kSizeK*kSizeN*batch;
+    const int c_offset = kSizeM*kSizeN*batch;
+
+    const __global half* restrict agm_ = &agm[a_offset];
+    const __global half* restrict bgm_ = &bgm[b_offset];
+    __global half* restrict cgm_ = &cgm[c_offset];
+
+    // Allocates workgroup-private memory (local memory)
+    #if SA == 1
+      __local struct alm_t alm;
+    #endif
+    #if SB == 1
+      __local struct blm_t blm;
+    #endif
+
+    #if SA == 1 && SB == 1
+        HgemmBody(kSizeM, kSizeN, kSizeK, alm.alm, blm.blm, agm_, bgm_, cgm_);
+    #elif SA == 1
+        HgemmBody(kSizeM, kSizeN, kSizeK, alm.alm, agm_, bgm_, cgm_);
+    #elif SB == 1
+        HgemmBody(kSizeM, kSizeN, kSizeK, blm.blm, agm_, bgm_, cgm_);
+    #else
+        HgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_);
+    #endif
+}
+
+// =================================================================================================
+
+// End of the C++11 raw string literal
+)"
+// =================================================================================================
diff --git a/src/kernels/tensorcore_test.opencl b/src/kernels/tensorcore_test.opencl
new file mode 100644
index 000000000..3a05a2f81
--- /dev/null
+++ b/src/kernels/tensorcore_test.opencl
@@ -0,0 +1,35 @@
+/*
+    This file is part of Leela Zero.
+    Copyright (C) 2017-2018 Gian-Carlo Pascutto and contributors
+
+    Leela Zero is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    Leela Zero is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+
+// This kernel simply tests if the host can compile a wmma insturction.
+// Not intended to be run at all.
+
+// Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string
+// literal). Comment-out this line for syntax-highlighting when developing.
+R"(
+
+__kernel void tensorcore_test(__global int * ptr) {
+    asm(
+        ".reg .b32 a0, a1, a2, a3, a4, a5, a6, a7;\n"
+        "wmma.load.a.sync.aligned.m16n16k16.shared.row.f16 {a0,a1,a2,a3,a4,a5,a6,a7}, [%0];\n" : : "l"(ptr)
+    );
+}
+
+// End of the C++11 raw string literal
+)"

From 1bf574448a96d1fc3356beb1af513f5e4de3007b Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Fri, 1 Feb 2019 14:54:29 +0100
Subject: [PATCH 29/45] Update TODO list.

We support avoid tags now. Clarify batching work needs
changes in the search.
---
 README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/README.md b/README.md
index e75eca350..7ae32e0c2 100644
--- a/README.md
+++ b/README.md
@@ -324,8 +324,7 @@ If interrupted, training can be resumed with:
 # Todo
 
 - [ ] Further optimize Winograd transformations.
-- [ ] Implement GPU batching.
-- [ ] GTP extension to exclude moves from analysis.
+- [ ] Implement GPU batching in the search.
 - [ ] Root filtering for handicap play.
 - More backends:
 - [ ] MKL-DNN based backend.

From 60528d6d7fb3fed663d46501b1f60d24e0449ac9 Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Fri, 1 Feb 2019 15:23:27 +0100
Subject: [PATCH 30/45] Remove an unnecessary std::move().

Which inhibits RVO. See e.g. https://stackoverflow.com/a/19272035
---
 src/Tuner.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Tuner.cpp b/src/Tuner.cpp
index c16a3b37d..6bf609b01 100644
--- a/src/Tuner.cpp
+++ b/src/Tuner.cpp
@@ -394,7 +394,7 @@ std::vector<Parameters> Tuner<net_t>::build_valid_params() {
     build_from(opts, 0);
     build_from(topts, 1);
 
-    return std::move(valid_params);
+    return valid_params;
 }
 
 template <typename net_t>

From ad4fd03d69fabad5c3909234f0c94be3f75be612 Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Mon, 4 Feb 2019 09:25:16 +0100
Subject: [PATCH 31/45] Add contributor (and maintainer) guidelines.

* Add contributor (and maintainer) guidelines.

Spell out the existing code style, C++ usage, git workflow,
commit message requirements, and give guidelines regarding reviewing,
merging and adding configuration options and GTP extensions.

Pull request #2186.
---
 CONTRIBUTING.md | 109 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 CONTRIBUTING.md

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 000000000..95cf49aea
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,109 @@
+# Contributing to Leela Zero
+
+## C++ Usage
+
+Leela Zero is written in C++14, and generally encourages writing in modern C++ style.
+
+This means that:
+
+* The code overwhelmingly uses Almost Always Auto style, and so should you.
+* Prefer range based for and non-member (c)begin/(c)end.
+* You can rely on boost 1.58.0 or later being present.
+* Manipulation of raw pointers is to be avoided as much as possible.
+* Prefer constexpr over defines or constants.
+* Prefer "using" over typedefs.
+* Prefer uniform initialization.
+* Prefer default initializers for member variables.
+* Aim for const-correctness. Prefer passing non-trivial parameters by const reference.
+* Use header include guards, not #pragma once (pragma once is non-standard, has issues with detecting identical files, and is slower https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58770)
+* config.h is always the first file included.
+* Feel free to use templates, but remember that debugging obscure template metaprogramming bugs is not something people enjoy doing in their spare time.
+* Using exceptions is allowed.
+
+## Code Style
+
+* Look at the surrounding code and the rest of the project!
+* Indentation is 4 spaces. No tabs.
+* public/private/protected access modifiers are de-indented
+* Maximum line length is 80 characters. There are rare exceptions in the code, usually involving user-visible text strings.
+* Ifs are always braced, with very rare exceptions when everything fits on one line and doing it properly makes the code less readable.
+* The code generally avoids any pointer passing and allows non-const references for parameters. Still, for new code it should be preferred to a) put input parameters first b) use return values over output parameters.
+* Function arguments that wrap are aligned.
+* Member variables in a class have an m_ prefix and are private. Members of POD structs don't and aren't.
+* Constants and enum values are ALLCAPS.
+* Variables are lowercase.
+* Function names are underscore_case.
+* Classes are CamelCase.
+* Comments are preferably full sentences with proper capitalization and a period.
+* Split the includes list into config.h, standard headers and our headers.
+
+If something is not addressed here or there is no similar code, the Google C++ Style Guide is always a good reference.
+
+We might move to enforce clang-format at some point.
+
+## Adding dependencies
+
+C++ does not quite have the package systems JavaScript and Rust have, so some restraint should be excercised when adding dependencies. Dependencies typically complicate the build for new contributors, especially on Windows, and reliance on specific, new versions can be a nuisance on Unix based systems.
+
+The restraints on modern header-only libraries are significantly less because they avoid most of the above problems.
+
+If a library is not mature and well-supported on Windows, Linux *and* macOS, you do not want it.
+
+This is not an excuse to re-invent the wheel.
+
+## Upgrading dependencies
+
+The code and dependencies should target the latest stable versions of Visual Studio/MSVC, and the latest stable/LTS releases of common Linux distros, with some additional delay as not everyone will be able to upgrade to a new stable/LTS right away.
+
+For example, upgrading to C++17 or boost 1.62.0 (oldest version in a Debian stable or Ubuntu LTS release) can be considered if there's a compelling use case and/or we can confirm it is supported on all platforms we reasonably target.
+
+## Merging contributions
+
+Contributions come in the form of pull requests against the "next" branch.
+
+They are rebased or squashed on top of the next branch, so the history will stay linear, i.e. no merge commits.
+
+Commit messages follow Linux kernel style: a summary phrase that is no more than 70-75 characters (but preferably <50) and describes both what the patch changes, as well as why the patch might be necessary.
+
+If the patch is to a specific subsystem (AutoGTP, Validation, ...) then prefix the summary by that subsystem (e.g. AutoGTP: ...).
+
+This is followed by a blank line, and a description that is wrapped at 72 characters. Good patch descriptions can be large time savers when someone has to bugfix the code afterwards.
+
+The end of the commit message should mention which (github) issue the patch fixes, if any, and the pull request it belongs to.
+
+Patches need to be reviewed before merging. Try to find the person who worked on the code last, or who has done work in nearby code (git blame is your friend, and this is why we write proper commit messages...). With some luck that is someone with write access to the repository. If not, you'll have to ping someone who does.
+
+Experience says that the majority of the pull requests won't live up to this ideal, which means that maintainers will have to squash patch series and clean up the commit message to be coherent before merging.
+
+If you are a person with write access to the repo, and are about to merge a commit, ask yourself the following question: am I confident enough that I understand this code, so that I can and am willing to go in and fix it if it turns out to be necessary? If the answer to this question is no, then do not merge the code. Not merging a contribution (quickly) is annoying for the individual contributor. Merging a bad contribution is annoying for everyone who wants to contribute now and in the future.
+
+If a contributor can't be bothered to fix up the trailing whitespace in their patch, odds are they aren't going to be willing to fix the threading bug it introduces either.
+
+## "Improvements" and Automagic
+
+Improvements to the engine that can affect strength should include supporting data. This means no-regression tests for functional changes, and a proof of strength improvement for things which are supposed to increase strength.
+
+The tools in the validation directory are well-fit for this purpose, as
+is the python tool "ringmaster".
+
+The number of configurable options should be limited where possible. If it is not possible for the author to make rules of thumb for suitable values for those options, then the majority of users have no hope of getting them right, and may mistakenly make the engine weaker. If you must introduce new ones, consider limiting their exposure to developers only via USE_TUNER and set a good default for them.
+
+## GTP Extensions
+
+GTP makes it possible to connect arbitrary engines to arbitrary interfaces.
+
+Unfortunately GTP 2 isn't extensive enough to realistically fit all needs of analysis GUIs, which means we have had to extend it. The lack of standardization here means that Go software is continously catching up to the chess world, especially after UCI was introduced. We should aim to make this situation better, not worse.
+
+This means that extensions have the possibility of outliving Leela Zero (or any GUIs) provided they are well thought out.
+
+It makes sense to be thoughtful here, consider the responsibilities of both GUI and engine, and try to come up with flexible building blocks rather than a plethora of commands for very specific use cases.
+
+Experience and previous discussions can help understanding:
+
+* lz-analyze "avoid" and "allow" were added in pull request #1949.
+* lz-analyze got a side-to-move option in pull request #1872 and #1642.
+* lz-analyze got a "prior" tag in pull request #1836.
+* lz-analyze was added in pull request #1388.
+* lz-setoption was added in pull request #1741.
+* Pull request #2170 has some discussion regarding how to navigate SGF
+  files that were parsed by the engine via GTP.

From f6757568b5135e0e0997972e027cef18ea173fbf Mon Sep 17 00:00:00 2001
From: Akita Noek <akitanoek@gmail.com>
Date: Mon, 4 Feb 2019 01:33:40 -0700
Subject: [PATCH 32/45] Add several simple GTP commands.

Added several simple GTP commands useful for building interfaces to LZ.

Added the following GTP commands.

    last_move
    move_history

The output of these commands is in line with that of the corresponding
commands in GNU Go when such commands existed.

Pull request #2170.
---
 src/GTP.cpp       | 31 +++++++++++++++++++++++++++++++
 src/GameState.cpp |  4 ++++
 src/GameState.h   |  1 +
 3 files changed, 36 insertions(+)

diff --git a/src/GTP.cpp b/src/GTP.cpp
index 83f4238b0..7ecc4f343 100644
--- a/src/GTP.cpp
+++ b/src/GTP.cpp
@@ -386,6 +386,8 @@ const std::string GTP::s_commands[] = {
     "time_settings",
     "time_left",
     "fixed_handicap",
+    "last_move",
+    "move_history",
     "place_free_handicap",
     "set_free_handicap",
     "loadsgf",
@@ -877,6 +879,35 @@ void GTP::execute(GameState & game, const std::string& xinput) {
             gtp_fail_printf(id, "Not a valid number of handicap stones");
         }
         return;
+    } else if (command.find("last_move") == 0) {
+        auto last_move = game.get_last_move();
+        if (last_move == FastBoard::NO_VERTEX) {
+            gtp_fail_printf(id, "no previous move known");
+            return;
+        }
+        auto coordinate = game.move_to_text(last_move);
+        auto color = game.get_to_move() == FastBoard::WHITE ? "black" : "white";
+        gtp_printf(id, "%s %s", color, coordinate.c_str());
+        return;
+    } else if (command.find("move_history") == 0) {
+        if (game.get_movenum() == 0) {
+            gtp_printf_raw("= \n");
+        } else {
+            gtp_printf_raw("= ");
+        }
+        auto game_history = game.get_game_history();
+        // undone moves may still be present, so reverse the portion of the
+        // array we need and resize to trim it down for iteration.
+        std::reverse(game_history.begin(), 
+                game_history.begin() + game.get_movenum() + 1);
+        game_history.resize(game.get_movenum());
+        for (auto &state : game_history) {
+            auto coordinate = game.move_to_text(state->get_last_move());
+            auto color = state->get_to_move() == FastBoard::WHITE ? "black" : "white";
+            gtp_printf_raw("%s %s\n", color, coordinate.c_str());
+        }
+        gtp_printf_raw("\n");
+        return;
     } else if (command.find("place_free_handicap") == 0) {
         std::istringstream cmdstream(command);
         std::string tmp;
diff --git a/src/GameState.cpp b/src/GameState.cpp
index cfd01bb87..f2df5ef46 100644
--- a/src/GameState.cpp
+++ b/src/GameState.cpp
@@ -312,3 +312,7 @@ const FullBoard& GameState::get_past_board(int moves_ago) const {
     assert(m_movenum + 1 <= game_history.size());
     return game_history[m_movenum - moves_ago]->board;
 }
+
+const std::vector<std::shared_ptr<const KoState>>& GameState::get_game_history() const {
+    return game_history;
+}
diff --git a/src/GameState.h b/src/GameState.h
index 050adf03e..3d368f10d 100644
--- a/src/GameState.h
+++ b/src/GameState.h
@@ -60,6 +60,7 @@ class GameState : public KoState {
     bool undo_move();
     bool forward_move();
     const FullBoard& get_past_board(int moves_ago) const;
+    const std::vector<std::shared_ptr<const KoState>>& get_game_history() const;
 
     void play_move(int color, int vertex);
     void play_move(int vertex);

From f174c43b3a7c52b4d6a2c95833839d3eab2e8db6 Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Mon, 4 Feb 2019 09:35:35 +0100
Subject: [PATCH 33/45] Minor style fixups.

Minor fixups for pull request #2170.
---
 src/GTP.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/GTP.cpp b/src/GTP.cpp
index 7ecc4f343..f08618da4 100644
--- a/src/GTP.cpp
+++ b/src/GTP.cpp
@@ -898,10 +898,10 @@ void GTP::execute(GameState & game, const std::string& xinput) {
         auto game_history = game.get_game_history();
         // undone moves may still be present, so reverse the portion of the
         // array we need and resize to trim it down for iteration.
-        std::reverse(game_history.begin(), 
-                game_history.begin() + game.get_movenum() + 1);
+        std::reverse(begin(game_history),
+                     begin(game_history) + game.get_movenum() + 1);
         game_history.resize(game.get_movenum());
-        for (auto &state : game_history) {
+        for (const auto &state : game_history) {
             auto coordinate = game.move_to_text(state->get_last_move());
             auto color = state->get_to_move() == FastBoard::WHITE ? "black" : "white";
             gtp_printf_raw("%s %s\n", color, coordinate.c_str());

From 901aaa9996b4e428bb74301a9d964bee7763e2a1 Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Mon, 4 Feb 2019 10:56:48 +0100
Subject: [PATCH 34/45] Remark about move assignment in style guideline.

Emphasize use of emplace_back and move semantics.
---
 CONTRIBUTING.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 95cf49aea..c75873ad4 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -14,6 +14,7 @@ This means that:
 * Prefer "using" over typedefs.
 * Prefer uniform initialization.
 * Prefer default initializers for member variables.
+* Prefer emplace_back and making use of move assignment.
 * Aim for const-correctness. Prefer passing non-trivial parameters by const reference.
 * Use header include guards, not #pragma once (pragma once is non-standard, has issues with detecting identical files, and is slower https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58770)
 * config.h is always the first file included.

From 5d4da0bef87f39be4cb8376266524b22a13c884c Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Mon, 4 Feb 2019 12:31:44 +0100
Subject: [PATCH 35/45] Add lz-analyze minmoves tag.

Add an lz-analyze tag to suggest the minimum amount of moves the
engine should post info about (rather than only those it considers
interesting, i.e. the ones with at least a visit).

This allows some very flexible constructs:

Getting a heatmap:

    lz-setoption name visits value 1
    lz-analyze interval 1 minmoves 361

Forcing a move among the top policy moves only:

    lz-setoption name visits value 1
    lz-analyze interval 1 minmoves 2
    (store those moves, e.g. A1, B1)
    lz-setoption name visits value 0
    lz-genmove_analyze b interval 1 allow b A1 1 allow b B1 1
---
 src/GTP.cpp          | 9 +++++++++
 src/GTP.h            | 2 ++
 src/UCTSearch.cpp    | 6 ++++--
 src/tests/gtests.cpp | 9 +++++++++
 4 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/GTP.cpp b/src/GTP.cpp
index f08618da4..2486da57f 100644
--- a/src/GTP.cpp
+++ b/src/GTP.cpp
@@ -230,6 +230,11 @@ AnalyzeTags::AnalyzeTags(std::istringstream& cmdstream, const GameState& game) {
             if (cmdstream.fail()) {
                 return;
             }
+        } else if (tag == "minmoves") {
+            cmdstream >> m_min_moves;
+            if (cmdstream.fail()) {
+                return;
+            }
         } else {
             return;
         }
@@ -256,6 +261,10 @@ int AnalyzeTags::who() const {
     return m_who;
 }
 
+size_t AnalyzeTags::post_move_count() const {
+    return m_min_moves;
+}
+
 bool AnalyzeTags::is_to_avoid(int color, int vertex, size_t movenum) const {
     for (auto& move : m_moves_to_avoid) {
         if (color == move.color && vertex == move.vertex && movenum <= move.until_move) {
diff --git a/src/GTP.h b/src/GTP.h
index cbf2b6633..8b9c5ee8c 100644
--- a/src/GTP.h
+++ b/src/GTP.h
@@ -67,6 +67,7 @@ class AnalyzeTags {
     int interval_centis() const;
     int invalid() const;
     int who() const;
+    size_t post_move_count() const;
     bool is_to_avoid(int color, int vertex, size_t movenum) const;
     bool has_move_restrictions() const;
 
@@ -75,6 +76,7 @@ class AnalyzeTags {
     std::vector<MoveToAvoid> m_moves_to_avoid, m_moves_to_allow;
     int m_interval_centis{0};
     int m_who{FastBoard::INVAL};
+    size_t m_min_moves{0};
 };
 
 extern bool cfg_gtp_mode;
diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp
index b42776c13..6677b9920 100644
--- a/src/UCTSearch.cpp
+++ b/src/UCTSearch.cpp
@@ -302,8 +302,10 @@ void UCTSearch::output_analysis(FastState & state, UCTNode & parent) {
     const auto color = state.get_to_move();
 
     for (const auto& node : parent.get_children()) {
-        // Only send variations with visits
-        if (!node->get_visits()) {
+        // Send only variations with visits, unless more moves were
+        // requested explicitly.
+        if (!node->get_visits()
+            && sortable_data.size() >= cfg_analyze_tags.post_move_count()) {
             continue;
         }
         std::string move = state.move_to_text(node->get_move());
diff --git a/src/tests/gtests.cpp b/src/tests/gtests.cpp
index 95d9fe5e5..1d1cbbcb7 100644
--- a/src/tests/gtests.cpp
+++ b/src/tests/gtests.cpp
@@ -375,3 +375,12 @@ TEST_F(LeelaTest, AnalyzeParse) {
     test_analyze_cmd("b avoid b a1:t19,pass,resign 1",
             true, FastBoard::BLACK, 0, 363, FastBoard::BLACK, 0);
 }
+
+TEST_F(LeelaTest, AnalyzeParseMinmoves) {
+    gtp_execute("clear_board");
+    gtp_execute("lz-setoption name pondering value false");
+    gtp_execute("lz-setoption name playouts value 1");
+    auto result = gtp_execute("lz-analyze b interval 1 minmoves 20");
+    // Expect to see at least 20 move priors
+    expect_regex(result.first, "info.*?((prior\\s+\\d+\\s+).*?){20,}.*");
+}

From 58460f7158025e108e1c1c14327c260bb7ccb5a4 Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Mon, 4 Feb 2019 12:51:52 +0100
Subject: [PATCH 36/45] Fix style, extra spaces in PV output.

Adding the minmoves tag exposes a small bug in the PV
output formatting. Avoid extra blank spaces.

Small style fixups.
---
 src/UCTSearch.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp
index 6677b9920..afd7c0902 100644
--- a/src/UCTSearch.cpp
+++ b/src/UCTSearch.cpp
@@ -276,10 +276,10 @@ void UCTSearch::dump_stats(FastState & state, UCTNode & parent) {
         // only one move searched the user could get an idea why.
         if (++movecount > 2 && !node->get_visits()) break;
 
-        std::string move = state.move_to_text(node->get_move());
-        FastState tmpstate = state;
+        auto move = state.move_to_text(node->get_move());
+        auto tmpstate = FastState{state};
         tmpstate.play_move(node->get_move());
-        std::string pv = move + " " + get_pv(tmpstate, *node);
+        auto pv = move + " " + get_pv(tmpstate, *node);
 
         myprintf("%4s -> %7d (V: %5.2f%%) (N: %5.2f%%) PV: %s\n",
             move.c_str(),
@@ -308,10 +308,11 @@ void UCTSearch::output_analysis(FastState & state, UCTNode & parent) {
             && sortable_data.size() >= cfg_analyze_tags.post_move_count()) {
             continue;
         }
-        std::string move = state.move_to_text(node->get_move());
-        FastState tmpstate = state;
+        auto move = state.move_to_text(node->get_move());
+        auto tmpstate = FastState{state};
         tmpstate.play_move(node->get_move());
-        std::string pv = move + " " + get_pv(tmpstate, *node);
+        auto rest_of_pv = get_pv(tmpstate, *node);
+        auto pv = move + (rest_of_pv.empty() ? "" : " " + rest_of_pv);
         auto move_eval = node->get_visits() ? node->get_raw_eval(color) : 0.0f;
         auto policy = node->get_policy();
         // Store data in array

From 5795d083bd3c7eff8c8d71ce20b2f036e31abd01 Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto <gcp@sjeng.org>
Date: Mon, 4 Feb 2019 13:52:15 +0100
Subject: [PATCH 37/45] Rework test regex for MSVC limits.

Seems like the previous test regex is causing MSVC's regex engine to run
out of stack space.
---
 src/tests/gtests.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/tests/gtests.cpp b/src/tests/gtests.cpp
index 1d1cbbcb7..7b86a7c1c 100644
--- a/src/tests/gtests.cpp
+++ b/src/tests/gtests.cpp
@@ -380,7 +380,7 @@ TEST_F(LeelaTest, AnalyzeParseMinmoves) {
     gtp_execute("clear_board");
     gtp_execute("lz-setoption name pondering value false");
     gtp_execute("lz-setoption name playouts value 1");
-    auto result = gtp_execute("lz-analyze b interval 1 minmoves 20");
-    // Expect to see at least 20 move priors
-    expect_regex(result.first, "info.*?((prior\\s+\\d+\\s+).*?){20,}.*");
+    auto result = gtp_execute("lz-analyze b interval 1 minmoves 5");
+    // Expect to see at least 5 move priors
+    expect_regex(result.first, "info.*?(prior\\s+\\d+\\s+.*?){5,}.*");
 }

From f6d13271a52e4c23d209a4d59fb8a909969c0dd5 Mon Sep 17 00:00:00 2001
From: ncaq <ncaq@ncaq.net>
Date: Wed, 6 Feb 2019 17:24:20 +0900
Subject: [PATCH 38/45] .gitignore: Add build.

leela-zero's default build directory is `build`.

It is very annoying when using leela as a git submodule that
the repository updates whenever it builds.

Pull request #2199.
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index ba1184178..17a4dc647 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,4 @@ leelaz_opencl_tuning
 /build-autogtp-*
 /build-validation-*
 .vs/
+build/

From 31c36431b1588723667da3d3e91e0567d67a197e Mon Sep 17 00:00:00 2001
From: Junhee Yoo <33939814+ihavnoid@users.noreply.github.com>
Date: Sat, 9 Feb 2019 23:29:44 +0900
Subject: [PATCH 39/45] Batched neural net evaluations

Group evaluations and run them in parallel. Roughly 50% speedup on my setup, but there are a couple of points that is debatable.

- Thread / batch sizing heuristics : This PR changes how the default threads / default batch sizes are picked.  See Leela.cpp
- Batch-forming heuristic : See OpenCLScheduler.cpp for the batch forming heuristic : the heuristic exists so that we can wait for the rest of the engine to create more NN evaluations so that we can run larger batches.  We can't wait indefinitely since there are cases we enter 'serial' paths.  Since heuristics are heuristics, these might need some tests on a larger variety of types of systems.

Did make sure that winrate improves when running default vs. default command line `./leelaz -w (weight file)` on time parity.

Pull request #2188.
---
 src/CPUPipe.h           |   1 -
 src/GTP.cpp             |  20 ++---
 src/GTP.h               |   4 +-
 src/Leela.cpp           | 105 ++++++++++++++++++-----
 src/Network.cpp         |   4 +-
 src/OpenCL.cpp          |  13 +--
 src/OpenCL.h            |   4 +-
 src/OpenCLScheduler.cpp | 181 ++++++++++++++++++++++++++++++++++------
 src/OpenCLScheduler.h   |  40 +++++++--
 src/SMP.cpp             |   2 +-
 src/SMP.h               |   3 +-
 src/UCTSearch.cpp       |  13 ++-
 src/config.h            |   5 --
 13 files changed, 308 insertions(+), 87 deletions(-)

diff --git a/src/CPUPipe.h b/src/CPUPipe.h
index 8b0730498..c750aff69 100644
--- a/src/CPUPipe.h
+++ b/src/CPUPipe.h
@@ -47,7 +47,6 @@ class CPUPipe : public ForwardPipe {
                               unsigned int channels,
                               unsigned int outputs,
                               std::shared_ptr<const ForwardPipeWeights> weights);
-
 private:
     void winograd_transform_in(const std::vector<float>& in,
                                std::vector<float>& V,
diff --git a/src/GTP.cpp b/src/GTP.cpp
index 2486da57f..cd018eee3 100644
--- a/src/GTP.cpp
+++ b/src/GTP.cpp
@@ -59,8 +59,8 @@ using namespace Utils;
 // Configuration flags
 bool cfg_gtp_mode;
 bool cfg_allow_pondering;
-int cfg_num_threads;
-int cfg_max_threads;
+unsigned int cfg_num_threads;
+unsigned int cfg_batch_size;
 int cfg_max_playouts;
 int cfg_max_visits;
 size_t cfg_max_memory;
@@ -314,15 +314,12 @@ void GTP::initialize(std::unique_ptr<Network>&& net) {
 void GTP::setup_default_parameters() {
     cfg_gtp_mode = false;
     cfg_allow_pondering = true;
-    cfg_max_threads = std::max(1, std::min(SMP::get_num_cpus(), MAX_CPUS));
-#ifdef USE_OPENCL
-    // If we will be GPU limited, using many threads won't help much.
-    // Multi-GPU is a different story, but we will assume that those people
-    // who do those stuff will know what they are doing.
-    cfg_num_threads = std::min(2, cfg_max_threads);
-#else
-    cfg_num_threads = cfg_max_threads;
-#endif
+
+    // we will re-calculate this on Leela.cpp
+    cfg_num_threads = 0;
+    // we will re-calculate this on Leela.cpp
+    cfg_batch_size = 0;
+
     cfg_max_memory = UCTSearch::DEFAULT_MAX_MEMORY;
     cfg_max_playouts = UCTSearch::UNLIMITED_PLAYOUTS;
     cfg_max_visits = UCTSearch::UNLIMITED_PLAYOUTS;
@@ -336,6 +333,7 @@ void GTP::setup_default_parameters() {
     cfg_gpus = { };
     cfg_sgemm_exhaustive = false;
     cfg_tune_only = false;
+
 #ifdef USE_HALF
     cfg_precision = precision_t::AUTO;
 #endif
diff --git a/src/GTP.h b/src/GTP.h
index 8b9c5ee8c..90bdd81d2 100644
--- a/src/GTP.h
+++ b/src/GTP.h
@@ -81,8 +81,8 @@ class AnalyzeTags {
 
 extern bool cfg_gtp_mode;
 extern bool cfg_allow_pondering;
-extern int cfg_num_threads;
-extern int cfg_max_threads;
+extern unsigned int cfg_num_threads;
+extern unsigned int cfg_batch_size;
 extern int cfg_max_playouts;
 extern int cfg_max_visits;
 extern size_t cfg_max_memory;
diff --git a/src/Leela.cpp b/src/Leela.cpp
index f95779478..a55e4c867 100644
--- a/src/Leela.cpp
+++ b/src/Leela.cpp
@@ -61,6 +61,74 @@ static void license_blurb() {
         PROGRAM_VERSION);
 }
 
+static void calculate_thread_count_cpu(boost::program_options::variables_map & vm) {
+    // If we are CPU-based, there is no point using more than the number of CPUs/
+    auto cfg_max_threads = std::min(SMP::get_num_cpus(), size_t{MAX_CPUS});
+
+    if (vm.count("threads")) {
+        auto num_threads = vm["threads"].as<unsigned int>();
+        if (num_threads > cfg_max_threads) {
+            myprintf("Clamping threads to maximum = %d\n", cfg_max_threads);
+            num_threads = cfg_max_threads;
+        }
+        cfg_num_threads = num_threads;
+    } else {
+        cfg_num_threads = cfg_max_threads;
+    }
+}
+
+#ifdef USE_OPENCL
+static void calculate_thread_count_gpu(boost::program_options::variables_map & vm) {
+    auto cfg_max_threads = size_t{MAX_CPUS};
+
+    // Default thread count : GPU case
+    // 1) if no args are given, use batch size of 5 and thread count of (batch size) * (number of gpus) * 2
+    // 2) if number of threads are given, use batch size of (thread count) / (number of gpus) / 2
+    // 3) if number of batches are given, use thread count of (batch size) * (number of gpus) * 2
+    auto gpu_count = cfg_gpus.size();
+    if (gpu_count == 0) {
+        // size of zero if autodetect GPU : default to 1
+        gpu_count = 1;
+    }
+
+    if (vm.count("threads")) {
+        auto num_threads = vm["threads"].as<unsigned int>();
+        if (num_threads > cfg_max_threads) {
+            myprintf("Clamping threads to maximum = %d\n", cfg_max_threads);
+            num_threads = cfg_max_threads;
+        }
+        cfg_num_threads = num_threads;
+
+        if (vm.count("batchsize")) {
+            cfg_batch_size = vm["batchsize"].as<unsigned int>();
+        } else {
+            cfg_batch_size = (cfg_num_threads + (gpu_count * 2) - 1) / (gpu_count * 2);
+
+            // no idea why somebody wants to use threads less than the number of GPUs
+            // but should at least prevent crashing
+            if (cfg_batch_size == 0) {
+                cfg_batch_size = 1;
+            }
+        }
+    } else {
+        if (vm.count("batchsize")) {
+            cfg_batch_size = vm["batchsize"].as<unsigned int>();
+        } else {
+            cfg_batch_size = 5;
+        }
+
+        cfg_num_threads = std::min(cfg_max_threads, cfg_batch_size * gpu_count * 2);
+    }
+
+    if (cfg_num_threads < cfg_batch_size) {
+        printf("Number of threads = %d must be larger than batch size = %d\n", cfg_num_threads, cfg_batch_size);
+        exit(EXIT_FAILURE);
+    }
+
+
+}
+#endif
+
 static void parse_commandline(int argc, char *argv[]) {
     namespace po = boost::program_options;
     // Declare the supported options.
@@ -68,7 +136,7 @@ static void parse_commandline(int argc, char *argv[]) {
     gen_desc.add_options()
         ("help,h", "Show commandline options.")
         ("gtp,g", "Enable GTP mode.")
-        ("threads,t", po::value<int>()->default_value(cfg_num_threads),
+        ("threads,t", po::value<unsigned int>(),
                       "Number of threads to use.")
         ("playouts,p", po::value<int>(),
                        "Weaken engine by limiting the number of playouts. "
@@ -104,6 +172,7 @@ static void parse_commandline(int argc, char *argv[]) {
                 "ID of the OpenCL device(s) to use (disables autodetection).")
         ("full-tuner", "Try harder to find an optimal OpenCL tuning.")
         ("tune-only", "Tune OpenCL only and then exit.")
+        ("batchsize", po::value<unsigned int>(), "Max batch size. Default is the number of threads divided by (2 * number of OpenCL devices).")
 #ifdef USE_HALF
         ("precision", po::value<std::string>(),
             "Floating-point precision (single/half/auto).\n"
@@ -231,11 +300,6 @@ static void parse_commandline(int argc, char *argv[]) {
 #ifdef USE_OPENCL
     if (vm.count("gpu")) {
         cfg_gpus = vm["gpu"].as<std::vector<int> >();
-        // if we use OpenCL, we probably need more threads for the max
-        // so that we can saturate the GPU.
-        cfg_max_threads *= cfg_gpus.size();
-        // we can't exceed MAX_CPUS
-        cfg_max_threads = std::min(cfg_max_threads, MAX_CPUS);
     }
 
     if (vm.count("full-tuner")) {
@@ -250,7 +314,6 @@ static void parse_commandline(int argc, char *argv[]) {
     if (vm.count("tune-only")) {
         cfg_tune_only = true;
     }
-
 #ifdef USE_HALF
     if (vm.count("precision")) {
         auto precision = vm["precision"].as<std::string>();
@@ -274,15 +337,19 @@ static void parse_commandline(int argc, char *argv[]) {
         }
     }
 #endif
+    if (vm.count("cpu-only")) {
+        cfg_cpu_only = true;
+    }
+#else
+    cfg_cpu_only = true;
 #endif
 
-    if (!vm["threads"].defaulted()) {
-        auto num_threads = vm["threads"].as<int>();
-        if (num_threads > cfg_max_threads) {
-            myprintf("Clamping threads to maximum = %d\n", cfg_max_threads);
-            num_threads = cfg_max_threads;
-        }
-        cfg_num_threads = num_threads;
+    if (cfg_cpu_only) {
+        calculate_thread_count_cpu(vm);
+    } else {
+#ifdef USE_OPENCL
+        calculate_thread_count_gpu(vm);
+#endif
     }
     myprintf("Using %d thread(s).\n", cfg_num_threads);
 
@@ -307,12 +374,6 @@ static void parse_commandline(int argc, char *argv[]) {
         cfg_dumbpass = true;
     }
 
-#ifndef USE_CPU_ONLY
-    if (vm.count("cpu-only")) {
-        cfg_cpu_only = true;
-    }
-#endif
-
     if (vm.count("playouts")) {
         cfg_max_playouts = vm["playouts"].as<int>();
         if (!vm.count("noponder")) {
@@ -391,9 +452,7 @@ static void parse_commandline(int argc, char *argv[]) {
         cfg_random_cnt = 0;
         cfg_rng_seed = 1;
         cfg_timemanage = TimeManagement::OFF;  // Reliable number of playouts.
-        if (vm["threads"].defaulted()) {
-            cfg_num_threads = 1;
-        }
+
         if (!vm.count("playouts") && !vm.count("visits")) {
             cfg_max_visits = 3200; // Default to self-play and match values.
         }
diff --git a/src/Network.cpp b/src/Network.cpp
index e9182bdf0..fd4bea544 100644
--- a/src/Network.cpp
+++ b/src/Network.cpp
@@ -107,7 +107,7 @@ float Network::benchmark_time(int centiseconds) {
     get_output(&state, Ensemble::RANDOM_SYMMETRY, -1, false, true, true);
 
     const Time start;
-    for (auto i = 0; i < cpus; i++) {
+    for (auto i = size_t{0}; i < cpus; i++) {
         tg.add_task([this, &runcount, start, centiseconds, state]() {
             while (true) {
                 runcount++;
@@ -134,7 +134,7 @@ void Network::benchmark(const GameState* const state, const int iterations) {
     ThreadGroup tg(thread_pool);
     std::atomic<int> runcount{0};
 
-    for (auto i = 0; i < cpus; i++) {
+    for (auto i = size_t{0}; i < cpus; i++) {
         tg.add_task([this, &runcount, iterations, state]() {
             while (runcount < iterations) {
                 runcount++;
diff --git a/src/OpenCL.cpp b/src/OpenCL.cpp
index 2701e441c..d316df1e3 100644
--- a/src/OpenCL.cpp
+++ b/src/OpenCL.cpp
@@ -172,9 +172,9 @@ void OpenCL_Network<net_t>::forward(const std::vector<float>& input,
         const auto n_ceil = ceilMultiple(ceilMultiple(tiles, nwg), vwn);
 
         const auto alloc_inSize =
-            MAX_BATCH * NUM_INTERSECTIONS * max_channels * sizeof(net_t);
+            getOpenCL().m_batch_size * NUM_INTERSECTIONS * max_channels * sizeof(net_t);
         const auto alloc_vm_size =
-            MAX_BATCH * WINOGRAD_TILE * m_ceil * n_ceil * sizeof(net_t);
+            getOpenCL().m_batch_size * WINOGRAD_TILE * m_ceil * n_ceil * sizeof(net_t);
 
         auto v_zeros = std::vector<net_t>(alloc_vm_size);
 
@@ -194,10 +194,10 @@ void OpenCL_Network<net_t>::forward(const std::vector<float>& input,
 
         opencl_context.m_pinnedOutBuffer_pol = cl::Buffer(
             m_opencl.m_context,
-            CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, MAX_BATCH * finalSize_pol);
+            CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, getOpenCL().m_batch_size * finalSize_pol);
         opencl_context.m_pinnedOutBuffer_val = cl::Buffer(
             m_opencl.m_context,
-            CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, MAX_BATCH * finalSize_val);
+            CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, getOpenCL().m_batch_size * finalSize_val);
 
         opencl_context.m_buffers_allocated = true;
     }
@@ -846,7 +846,8 @@ OpenCL<net_t>::OpenCL(int gpu, bool silent) {
 }
 
 template <typename net_t>
-void OpenCL<net_t>::initialize(const int channels) {
+void OpenCL<net_t>::initialize(const int channels, size_t batch_size) {
+    m_batch_size = batch_size;
     // Make program of the source code in the context
     try {
         m_program = cl::Program(m_context,
@@ -866,7 +867,7 @@ void OpenCL<net_t>::initialize(const int channels) {
     }
 
     auto sgemm_tuners =
-        t.load_sgemm_tuners(channels, WINOGRAD_P, channels, WINOGRAD_TILE);
+        t.load_sgemm_tuners(channels, batch_size * WINOGRAD_P, channels, WINOGRAD_TILE);
 
     // Some NVIDIA drivers are buggy and will fail to compile the rest of the
     // kernels after a tuning run.
diff --git a/src/OpenCL.h b/src/OpenCL.h
index 86249a426..99470f062 100644
--- a/src/OpenCL.h
+++ b/src/OpenCL.h
@@ -195,7 +195,8 @@ class OpenCL {
     friend class Tuner<net_t>;
 public:
     OpenCL(int gpu, bool silent = false);
-    void initialize(const int channels);
+
+    void initialize(const int channels, size_t batch_size = 1);
     void ensure_context_initialized(OpenCLContext & opencl_context);
     std::string get_device_name();
     bool has_fp16_compute();
@@ -208,6 +209,7 @@ class OpenCL {
     void tune_sgemm();
     void process_tuners(std::string tuners);
 
+    size_t m_batch_size = 1;
     cl::Program m_program;
     std::string m_cl_args;
 
diff --git a/src/OpenCLScheduler.cpp b/src/OpenCLScheduler.cpp
index 96c8b9349..85759063d 100644
--- a/src/OpenCLScheduler.cpp
+++ b/src/OpenCLScheduler.cpp
@@ -29,6 +29,7 @@
 #include "config.h"
 
 #ifdef USE_OPENCL
+
 #include "GTP.h"
 #include "Random.h"
 #include "Network.h"
@@ -36,6 +37,7 @@
 #include "OpenCLScheduler.h"
 
 using Utils::ceilMultiple;
+using Utils::myprintf;
 
 class from_float{
 public:
@@ -109,18 +111,16 @@ OpenCLScheduler<net_t>::OpenCLScheduler() {
 
 template <typename net_t>
 void OpenCLScheduler<net_t>::initialize(const int channels) {
-    // Launch the worker thread.
-    // Round_up(cfg_num_threads / gpus.size()) threads
-    // so that we only have enough contexts to achieve full parallelism.
-    const auto num_threads = (cfg_num_threads + m_opencl.size() - 1) / m_opencl.size();
-    m_context_pool.resize(num_threads);
+    // Launch the worker threads.  Minimum 1 worker per GPU, but use enough threads
+    // so that we can at least concurrently schedule something to the GPU.
+    auto num_worker_threads = cfg_num_threads / cfg_batch_size / (m_opencl.size() + 1) + 1;
     auto gnum = 0;
     for (auto & opencl : m_opencl) {
-        opencl->initialize(channels);
+        opencl->initialize(channels, cfg_batch_size);
 
-        for (auto i = size_t{0}; i < num_threads; i++) {
-            m_context_pool[i].emplace_back(
-                std::make_shared<ContextPoolEntry>(gnum));
+        for (auto i = unsigned{0}; i < num_worker_threads; i++) {
+            auto t = std::thread(&OpenCLScheduler<net_t>::batch_worker, this, gnum);
+            m_worker_threads.push_back(std::move(t));
         }
         gnum++;
     }
@@ -132,6 +132,18 @@ void OpenCLScheduler<net_t>::initialize(const int channels) {
     }
 }
 
+template <typename net_t>
+OpenCLScheduler<net_t>::~OpenCLScheduler() {
+    {
+        std::unique_lock<std::mutex> lk(m_mutex);
+        m_running = false;
+    }
+    m_cv.notify_all();
+    for (auto & x : m_worker_threads) {
+        x.join();
+    }
+}
+
 template<typename net_t>
 bool OpenCLScheduler<net_t>::needs_autodetect() {
     for (auto& opencl : m_opencl) {
@@ -254,29 +266,146 @@ template <typename net_t>
 void OpenCLScheduler<net_t>::forward(const std::vector<float>& input,
                                      std::vector<float>& output_pol,
                                      std::vector<float>& output_val) {
-    std::shared_ptr<ContextPoolEntry> ctx;
-    auto queue_num = size_t{0};
+    auto entry = std::make_shared<ForwardQueueEntry>(input, output_pol, output_val);
+    std::unique_lock<std::mutex> lk(entry->mutex);
     {
-        LOCK(m_context_pool_mutex, lock);
-        while (queue_num < m_context_pool.size()) {
-            if (!m_context_pool[queue_num].empty()) {
-                ctx = std::move(m_context_pool[queue_num].front());
-                m_context_pool[queue_num].pop_front();
+        std::unique_lock<std::mutex> lk(m_mutex);
+        m_forward_queue.push_back(entry);
+
+        if (m_single_eval_in_progress.load()) {
+            m_waittime += 2;
+        }
+    }
+    m_cv.notify_one();
+    entry->cv.wait(lk);
+}
+
+#ifndef NDEBUG
+struct batch_stats_t batch_stats;
+#endif
+
+template <typename net_t>
+void OpenCLScheduler<net_t>::batch_worker(const size_t gnum) {
+    constexpr auto in_size = Network::INPUT_CHANNELS * BOARD_SIZE * BOARD_SIZE;
+    constexpr auto out_pol_size = Network::OUTPUTS_POLICY * BOARD_SIZE * BOARD_SIZE;
+    constexpr auto out_val_size = Network::OUTPUTS_VALUE * BOARD_SIZE * BOARD_SIZE;
+
+    OpenCLContext context;
+
+    // batch scheduling heuristic.
+    // Returns the batch picked up from the queue (m_forward_queue)
+    // 1) Wait for m_waittime milliseconds for full batch
+    // 2) if we don't have a full batch then just do a single eval
+    //
+    // The purpose of m_waittime is to prevent the system from deadlocking
+    // because we were waiting for a job too long, while the job is never
+    // going to come due to a control dependency (e.g., evals stuck on a
+    // critical path).  To do so:
+    //
+    // 1) if we couldn't form a batch after waiting m_waittime ms, it means
+    // that we hit the critical path and should do scalar evals.
+    // Wait 1ms shorter next time.
+    //
+    // 2) if we picked up a single eval, but were getting additional evals
+    // while that single eval was being processed, it means that we made
+    // the wrong decision.  Wait 2ms longer next time.
+
+    auto pickup_task = [this, gnum] () {
+        std::list<std::shared_ptr<ForwardQueueEntry>> inputs;
+        size_t count = 0;
+
+        std::unique_lock<std::mutex> lk(m_mutex);
+        while (true) {
+            if (!m_running) return inputs;
+
+            count = m_forward_queue.size();
+            if (count >= cfg_batch_size) {
+                count = cfg_batch_size;
                 break;
             }
-            queue_num++;
+
+            bool timeout = !m_cv.wait_for(
+                lk,
+                std::chrono::milliseconds(m_waittime),
+                [this] () {
+                    return !m_running || m_forward_queue.size() >= cfg_batch_size;
+                }
+            );
+
+            if (!m_forward_queue.empty()) {
+                if (timeout && m_single_eval_in_progress.exchange(true) == false) {
+                    // Waited long enough but couldn't form a batch.
+                    // Check if there is any other single eval in progress, and if not,
+                    // do one from this thread.
+                    if (m_waittime > 1) {
+                        m_waittime--;
+                    }
+                    count = 1;
+                    break;
+                }
+            }
         }
-        // If this failed, it means we ran out of contexts
-        // which should be more than or equal to the number of threads.
-        assert(ctx != nullptr);
-    }
+        // Move 'count' evals from shared queue to local list.
+        auto end = begin(m_forward_queue);
+        std::advance(end, count);
+        std::move(begin(m_forward_queue), end, std::back_inserter(inputs));
+        m_forward_queue.erase(begin(m_forward_queue), end);
 
-    m_networks[ctx->net_index]->forward(input, output_pol, output_val,
-                                        ctx->context);
+        return inputs;
+    };
 
-    {
-        LOCK(m_context_pool_mutex, lock);
-        m_context_pool[queue_num].push_back(std::move(ctx));
+    auto batch_input = std::vector<float>();
+    auto batch_output_pol = std::vector<float>();
+    auto batch_output_val = std::vector<float>();
+
+    while (true) {
+        auto inputs = pickup_task();
+        auto count = inputs.size();
+
+        if (!m_running) {
+            return;
+        }
+
+#ifndef NDEBUG
+        if (count == 1) {
+            batch_stats.single_evals++;
+        } else {
+            batch_stats.batch_evals++;
+        }
+#endif
+
+        // prepare input for forward() call
+        batch_input.resize(in_size * count);
+        batch_output_pol.resize(out_pol_size * count);
+        batch_output_val.resize(out_val_size * count);
+
+        auto index = size_t{0};
+        for (auto & x : inputs) {
+            std::unique_lock<std::mutex> lk(x->mutex);
+            std::copy(begin(x->in), end(x->in), begin(batch_input) + in_size * index);
+            index++;
+        }
+
+        // run the NN evaluation
+        m_networks[gnum]->forward(
+            batch_input, batch_output_pol, batch_output_val, context, count);
+
+        // Get output and copy back
+        index = 0;
+        for (auto & x : inputs) {
+            std::copy(begin(batch_output_pol) + out_pol_size * index,
+                      begin(batch_output_pol) + out_pol_size * (index + 1),
+                      begin(x->out_p));
+            std::copy(begin(batch_output_val) + out_val_size * index,
+                      begin(batch_output_val) + out_val_size * (index + 1),
+                      begin(x->out_v));
+            x->cv.notify_all();
+            index++;
+        }
+
+        if (count == 1) {
+            m_single_eval_in_progress = false;
+        }
     }
 }
 
diff --git a/src/OpenCLScheduler.h b/src/OpenCLScheduler.h
index 329f25375..06fae8da7 100644
--- a/src/OpenCLScheduler.h
+++ b/src/OpenCLScheduler.h
@@ -33,23 +33,40 @@
 
 #include <list>
 #include <vector>
+#include <thread>
 
 #include "SMP.h"
 #include "ForwardPipe.h"
 #include "OpenCL.h"
 #include "ThreadPool.h"
 
+#ifndef NDEBUG
+struct batch_stats_t {
+    std::atomic<size_t> single_evals{0};
+    std::atomic<size_t> batch_evals{0};
+};
+extern batch_stats_t batch_stats;
+#endif
 
 template <typename net_t>
 class OpenCLScheduler : public ForwardPipe {
-    class ContextPoolEntry {
+    class ForwardQueueEntry {
     public:
-        size_t net_index;
-        OpenCLContext context;
-        ContextPoolEntry(size_t index) : net_index(index) {}
+        std::mutex mutex;
+        std::condition_variable cv;
+        const std::vector<float>& in;
+        std::vector<float>& out_p;
+        std::vector<float>& out_v;
+        ForwardQueueEntry(const std::vector<float>& input,
+                          std::vector<float>& output_pol,
+                          std::vector<float>& output_val)
+        : in(input), out_p(output_pol), out_v(output_val)
+          {}
     };
 public:
+    virtual ~OpenCLScheduler();
     OpenCLScheduler();
+
     virtual void initialize(const int channels);
     virtual void forward(const std::vector<float>& input,
                          std::vector<float>& output_pol,
@@ -60,14 +77,23 @@ class OpenCLScheduler : public ForwardPipe {
                               unsigned int outputs,
                               std::shared_ptr<const ForwardPipeWeights> weights);
 private:
+    bool m_running = true;
     std::vector<std::unique_ptr<OpenCL_Network<net_t>>> m_networks;
     std::vector<std::unique_ptr<OpenCL<net_t>>> m_opencl;
 
-    using ContextPoolQueue = std::list<std::shared_ptr<ContextPoolEntry>>;
-    std::vector<ContextPoolQueue> m_context_pool;
+    std::mutex m_mutex;
+    std::condition_variable m_cv;
+
+    // start with 10 milliseconds : lock protected
+    int m_waittime{10};
+
+    // set to true when single (non-batch) eval is in progress
+    std::atomic<bool> m_single_eval_in_progress{false};
 
-    SMP::Mutex m_context_pool_mutex;
+    std::list<std::shared_ptr<ForwardQueueEntry>> m_forward_queue;
+    std::list<std::thread> m_worker_threads;
 
+    void batch_worker(const size_t gnum);
     void push_input_convolution(unsigned int filter_size,
                                 unsigned int channels,
                                 unsigned int outputs,
diff --git a/src/SMP.cpp b/src/SMP.cpp
index a42f09769..a43b90129 100644
--- a/src/SMP.cpp
+++ b/src/SMP.cpp
@@ -73,6 +73,6 @@ SMP::Lock::~Lock() {
     }
 }
 
-int SMP::get_num_cpus() {
+size_t SMP::get_num_cpus() {
     return std::thread::hardware_concurrency();
 }
diff --git a/src/SMP.h b/src/SMP.h
index 6c110eb28..bf2f48fc1 100644
--- a/src/SMP.h
+++ b/src/SMP.h
@@ -32,10 +32,11 @@
 
 #include "config.h"
 
+#include <cstddef>
 #include <atomic>
 
 namespace SMP {
-    int get_num_cpus();
+    size_t get_num_cpus();
 
     class Mutex {
     public:
diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp
index afd7c0902..ae96a5322 100644
--- a/src/UCTSearch.cpp
+++ b/src/UCTSearch.cpp
@@ -48,6 +48,7 @@
 #include "Timing.h"
 #include "Training.h"
 #include "Utils.h"
+#include "OpenCLScheduler.h"
 
 using namespace Utils;
 
@@ -794,6 +795,16 @@ int UCTSearch::think(int color, passflag_t passflag) {
              m_nodes.load(),
              m_playouts.load(),
              (m_playouts * 100.0) / (elapsed_centis+1));
+
+#ifdef USE_OPENCL
+#ifndef NDEBUG
+    myprintf("batch stats: %d %d\n",
+        batch_stats.single_evals.load(),
+        batch_stats.batch_evals.load()
+    );
+#endif
+#endif
+
     int bestmove = get_best_move(passflag);
 
     // Save the explanation.
@@ -827,7 +838,7 @@ void UCTSearch::ponder() {
 
     m_run = true;
     ThreadGroup tg(thread_pool);
-    for (int i = 1; i < cfg_num_threads; i++) {
+    for (auto i = size_t{1}; i < cfg_num_threads; i++) {
         tg.add_task(UCTWorker(m_rootstate, this, m_root.get()));
     }
     Time start;
diff --git a/src/config.h b/src/config.h
index 34af48d89..9270063d1 100644
--- a/src/config.h
+++ b/src/config.h
@@ -101,11 +101,6 @@ static constexpr auto POTENTIAL_MOVES = NUM_INTERSECTIONS + 1; // including pass
 
 #endif
 
-/* Maximum supported batch size for OpenCL.
- */
-static constexpr auto MAX_BATCH = 1;
-static_assert(MAX_BATCH == 1, "MAX_BATCH != 1 not implemented");
-
 /*
  * USE_TUNER: Expose some extra command line parameters that allow tuning the
  * search algorithm.

From d8ea34d06048e4d4e5da33b2d571ba75e0e65cd3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Forst=C3=A9n?= <henrik.forsten@gmail.com>
Date: Sun, 10 Feb 2019 16:20:27 +0200
Subject: [PATCH 40/45] Autogtp: Tune for batchsize 1

Self-play games specify `-t 1` for playing which implies batch size of 1, but tuning was done for default settings since number of threads was not specified.

Pull request #2206
---
 autogtp/Management.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autogtp/Management.cpp b/autogtp/Management.cpp
index f9c3874cb..bc9574347 100644
--- a/autogtp/Management.cpp
+++ b/autogtp/Management.cpp
@@ -106,7 +106,7 @@ void Management::giveAssignments() {
     QTextStream(stdout) << "Starting tuning process, please wait..." << endl;
 
     Order tuneOrder = getWork(true);
-    QString tuneCmdLine("./leelaz --tune-only -w networks/");
+    QString tuneCmdLine("./leelaz -t 1 --tune-only -w networks/");
     tuneCmdLine.append(tuneOrder.parameters()["network"] + ".gz");
     if (m_gpusList.isEmpty()) {
         runTuningProcess(tuneCmdLine);

From efcec18e12b5dbf38a278a5a3dfb2f19ea72dfd1 Mon Sep 17 00:00:00 2001
From: Seth Troisi <sethtroisi@google.com>
Date: Wed, 13 Feb 2019 02:40:19 -0800
Subject: [PATCH 41/45] Tweak conversion script for ELF v2.

Small tweak to conversion script for ELF v2 weights.

Pull request #2213.
---
 training/elf/elf_convert.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/training/elf/elf_convert.py b/training/elf/elf_convert.py
index cf8412ea3..89d65a442 100755
--- a/training/elf/elf_convert.py
+++ b/training/elf/elf_convert.py
@@ -41,7 +41,10 @@ def write_block(f, b):
 with open('elf_converted_weights.txt', 'w') as f:
     # version 2 means value head is for black, not for side to move
     f.write('2\n')
-    b = convert_block(state, 'init_conv')
+    if 'init_conv.0.weight' in state:
+        b = convert_block(state, 'init_conv')
+    else:
+        b = convert_block(state, 'init_conv.module')
 
     # Permutate input planes
     p = [0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15, 16, 17]

From 6aaa5bba52b2842e8e23ea38a4380ef3543fb209 Mon Sep 17 00:00:00 2001
From: Jonathan Roy <jonroy7@gmail.com>
Date: Sat, 16 Feb 2019 23:34:51 -0500
Subject: [PATCH 42/45] Update README.md.

Update links to leela-zero instead of gcp.
Update badge and link to the new AppVeyor project
under leela-zero instead of gcp ownership.
---
 README.md | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 7ae32e0c2..2cf5c9c82 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,5 @@
-[![Linux Build Status](https://travis-ci.org/gcp/leela-zero.svg?branch=next)](https://travis-ci.org/gcp/leela-zero)
-[![Windows Build Status](https://ci.appveyor.com/api/projects/status/pf1hcgly8f1a8iu0/branch/next?svg=true)](https://ci.appveyor.com/project/gcp/leela-zero/branch/next)
-
+[![Linux Build Status](https://travis-ci.org/leela-zero/leela-zero.svg?branch=next)](https://travis-ci.org/leela-zero/leela-zero)
+[![Windows Build Status](https://ci.appveyor.com/api/projects/status/dcvp31x1e0yavrtf/branch/next?svg=true)](https://ci.appveyor.com/project/gcp/leela-zero-8arv1/branch/next)
 
 # What
 
@@ -43,7 +42,7 @@ the distributed effort. But you can still play, especially if you are patient.
 
 ### Windows
 
-Head to the Github releases page at https://github.com/gcp/leela-zero/releases,
+Head to the Github releases page at https://github.com/leela-zero/leela-zero/releases,
 download the latest release, unzip, and launch autogtp.exe. It will connect to
 the server automatically and do its work in the background, uploading results
 after each game. You can just close the autogtp window to stop it.
@@ -70,7 +69,7 @@ There are community maintained instructions available here:
 Download the best known network weights file from [here](https://zero.sjeng.org/best-network), or, if you prefer a more human style,
 a (weaker) network trained from human games [here](https://sjeng.org/zero/best_v1.txt.zip).
 
-If you are on Windows, download an official release from [here](https://github.com/gcp/leela-zero/releases) and head to the [Usage](#usage-for-playing-or-analyzing-games)
+If you are on Windows, download an official release from [here](https://github.com/leela-zero/leela-zero/releases) and head to the [Usage](#usage-for-playing-or-analyzing-games)
 section of this README.
 
 If you are on Unix or macOS, you have to compile the program yourself. Follow
@@ -101,7 +100,7 @@ by adding -DUSE_CPU_ONLY=1 to the cmake command line.
     sudo apt install clinfo && clinfo
 
     # Clone github repo
-    git clone https://github.com/gcp/leela-zero
+    git clone https://github.com/leela-zero/leela-zero
     cd leela-zero
     git submodule update --init --recursive
 
@@ -121,7 +120,7 @@ by adding -DUSE_CPU_ONLY=1 to the cmake command line.
 ## Example of compiling - macOS
 
     # Clone github repo
-    git clone https://github.com/gcp/leela-zero
+    git clone https://github.com/leela-zero/leela-zero
     cd leela-zero
     git submodule update --init --recursive
 
@@ -141,7 +140,7 @@ by adding -DUSE_CPU_ONLY=1 to the cmake command line.
 ## Example of compiling - Windows
 
     # Clone github repo
-    git clone https://github.com/gcp/leela-zero
+    git clone https://github.com/leela-zero/leela-zero
     cd leela-zero
     git submodule update --init --recursive
 

From 2c394ab402ac601c2c18c5fbfda3c9ed0e5bc650 Mon Sep 17 00:00:00 2001
From: TFiFiE <TFiFiE@users.noreply.github.com>
Date: Tue, 19 Feb 2019 19:22:26 +0100
Subject: [PATCH 43/45] Remove unused lambda capture.

Pull request #2231.
---
 src/OpenCLScheduler.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/OpenCLScheduler.cpp b/src/OpenCLScheduler.cpp
index 85759063d..28f346c0d 100644
--- a/src/OpenCLScheduler.cpp
+++ b/src/OpenCLScheduler.cpp
@@ -310,7 +310,7 @@ void OpenCLScheduler<net_t>::batch_worker(const size_t gnum) {
     // while that single eval was being processed, it means that we made
     // the wrong decision.  Wait 2ms longer next time.
 
-    auto pickup_task = [this, gnum] () {
+    auto pickup_task = [this] () {
         std::list<std::shared_ptr<ForwardQueueEntry>> inputs;
         size_t count = 0;
 

From d6db69f6848318b31d6a64c4ea78cb5068bc7375 Mon Sep 17 00:00:00 2001
From: TFiFiE <TFiFiE@users.noreply.github.com>
Date: Tue, 19 Feb 2019 20:00:13 +0100
Subject: [PATCH 44/45] README.md: link to mentioned pull requests.

Pull request #2229.
---
 CONTRIBUTING.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c75873ad4..778ead8f6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -31,7 +31,7 @@ This means that:
 * The code generally avoids any pointer passing and allows non-const references for parameters. Still, for new code it should be preferred to a) put input parameters first b) use return values over output parameters.
 * Function arguments that wrap are aligned.
 * Member variables in a class have an m_ prefix and are private. Members of POD structs don't and aren't.
-* Constants and enum values are ALLCAPS.
+* Constants and enum values are ALL_CAPS.
 * Variables are lowercase.
 * Function names are underscore_case.
 * Classes are CamelCase.
@@ -101,10 +101,10 @@ It makes sense to be thoughtful here, consider the responsibilities of both GUI
 
 Experience and previous discussions can help understanding:
 
-* lz-analyze "avoid" and "allow" were added in pull request #1949.
-* lz-analyze got a side-to-move option in pull request #1872 and #1642.
-* lz-analyze got a "prior" tag in pull request #1836.
-* lz-analyze was added in pull request #1388.
-* lz-setoption was added in pull request #1741.
-* Pull request #2170 has some discussion regarding how to navigate SGF
+* lz-analyze "avoid" and "allow" were added in pull request [#1949](https://github.com/leela-zero/leela-zero/pull/1949).
+* lz-analyze got a side-to-move option in pull request [#1872](https://github.com/leela-zero/leela-zero/pull/1872) and [#1642](https://github.com/leela-zero/leela-zero/pull/1642).
+* lz-analyze got a "prior" tag in pull request [#1836](https://github.com/leela-zero/leela-zero/pull/1836).
+* lz-analyze was added in pull request [#1388](https://github.com/leela-zero/leela-zero/pull/1388).
+* lz-setoption was added in pull request [#1741](https://github.com/leela-zero/leela-zero/pull/1741).
+* Pull request [#2170](https://github.com/leela-zero/leela-zero/pull/2170) has some discussion regarding how to navigate SGF
   files that were parsed by the engine via GTP.

From dab65c8d6e0908f102b62491f783672e8a2be339 Mon Sep 17 00:00:00 2001
From: TFiFiE <TFiFiE@users.noreply.github.com>
Date: Tue, 19 Feb 2019 20:04:20 +0100
Subject: [PATCH 45/45] Minor cleanup involving Network::get_output.

Pull request #2228.
---
 src/GTP.cpp      | 3 +--
 src/Network.cpp  | 1 +
 src/Training.cpp | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/GTP.cpp b/src/GTP.cpp
index cd018eee3..2bdd8613d 100644
--- a/src/GTP.cpp
+++ b/src/GTP.cpp
@@ -858,8 +858,7 @@ void GTP::execute(GameState & game, const std::string& xinput) {
             }
         } else if (symmetry == "average" || symmetry == "avg") {
             vec = s_network->get_output(
-                &game, Network::Ensemble::AVERAGE,
-                Network::NUM_SYMMETRIES, false);
+                &game, Network::Ensemble::AVERAGE, -1, false);
         } else {
             vec = s_network->get_output(
                 &game, Network::Ensemble::DIRECT, std::stoi(symmetry), false);
diff --git a/src/Network.cpp b/src/Network.cpp
index fd4bea544..d4bc36f7a 100644
--- a/src/Network.cpp
+++ b/src/Network.cpp
@@ -743,6 +743,7 @@ Network::Netresult Network::get_output(
         assert(symmetry >= 0 && symmetry < NUM_SYMMETRIES);
         result = get_output_internal(state, symmetry);
     } else if (ensemble == AVERAGE) {
+        assert(symmetry == -1);
         for (auto sym = 0; sym < NUM_SYMMETRIES; ++sym) {
             auto tmpresult = get_output_internal(state, sym);
             result.winrate +=
diff --git a/src/Training.cpp b/src/Training.cpp
index 9228ae6e4..c714b6227 100644
--- a/src/Training.cpp
+++ b/src/Training.cpp
@@ -169,8 +169,8 @@ void Training::record(Network & network, GameState& state, UCTNode& root) {
     step.to_move = state.board.get_to_move();
     step.planes = get_planes(&state);
 
-    auto result =
-        network.get_output(&state, Network::Ensemble::DIRECT, 0);
+    const auto result = network.get_output(
+        &state, Network::Ensemble::DIRECT, Network::IDENTITY_SYMMETRY);
     step.net_winrate = result.winrate;
 
     const auto& best_node = root.get_best_root_child(step.to_move);