Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tree search enhancements in low winrate situations #17

Open
wants to merge 30 commits into
base: patch-4
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
c822e5e
AutoGTP: send leelaz version to server.
marcocalignano May 16, 2018
8751123
Multi GPU: fix split and variable placement.
godmoves May 17, 2018
0300531
Mutex optimization.
OmnipotentEntity May 17, 2018
8daa7e7
Update leela-zero.vcxproj for VS2015.
alreadydone May 18, 2018
792c0c7
Update Leela.cpp
alreadydone May 23, 2018
1fe15a5
Update GTP.h
alreadydone May 23, 2018
89b4d20
Update GTP.cpp
alreadydone May 23, 2018
5d3de3d
Update UCTSearch.h
alreadydone May 23, 2018
69b5419
Update UCTSearch.cpp
alreadydone May 23, 2018
194744b
Update UCTNode.cpp
alreadydone May 23, 2018
bbec7d8
first precision fix
alreadydone May 23, 2018
1362c8a
typo
alreadydone May 23, 2018
a8f2652
Update Leela.cpp
alreadydone May 23, 2018
3edc973
horrible typo
alreadydone May 23, 2018
5d0a017
Merge pull request #18 from gcp/next
alreadydone May 23, 2018
397cb85
bug fix
alreadydone May 23, 2018
732b204
Update Leela.cpp
alreadydone May 23, 2018
8adcafc
Update UCTNode.cpp
alreadydone May 24, 2018
ead8040
fix division by zero with multi threads
alreadydone May 24, 2018
e1b7968
Update Leela.cpp
alreadydone May 25, 2018
a1c0839
Update Leela.cpp
alreadydone May 25, 2018
134ab2e
Update GTP.cpp
alreadydone May 25, 2018
00fb0a3
Update UCTSearch.h
alreadydone May 25, 2018
bb3e009
Update UCTSearch.cpp
alreadydone May 25, 2018
097a99c
Update UCTSearch.h
alreadydone May 25, 2018
18638f7
Update GTP.cpp
alreadydone May 25, 2018
71eaa2f
Update Leela.cpp
alreadydone May 25, 2018
41c1306
Update UCTNode.cpp
alreadydone Jun 7, 2018
44788d8
Update UCTNodePointer.h
alreadydone Jun 7, 2018
7242bce
Update UCTNodePointer.cpp
alreadydone Jun 7, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions autogtp/Management.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,16 @@ void Management::runTuningProcess(const QString &tuneCmdLine) {
tuneProcess.waitForStarted(-1);
while (tuneProcess.state() == QProcess::Running) {
tuneProcess.waitForReadyRead(1000);
QByteArray text = tuneProcess.readAllStandardOutput();
int version_start = text.indexOf("Leela Zero ") + 11;
if (version_start > 10) {
int version_end = text.indexOf(" ", version_start);
m_leelaversion = QString(text.mid(version_start, version_end - version_start));
}
QTextStream(stdout) << text;
QTextStream(stdout) << tuneProcess.readAllStandardError();
}
QTextStream(stdout) << "Found Leela Version : " << m_leelaversion << endl;
tuneProcess.waitForFinished(-1);
}

Expand Down Expand Up @@ -316,6 +324,8 @@ Order Management::getWorkInternal(bool tuning) {
prog_cmdline.append("0");
} else {
prog_cmdline.append(QString::number(AUTOGTP_VERSION));
if (!m_leelaversion.isEmpty())
prog_cmdline.append("/"+m_leelaversion);
}
QProcess curl;
curl.start(prog_cmdline);
Expand Down
1 change: 1 addition & 0 deletions autogtp/Management.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ public slots:
int m_threadsLeft;
bool m_delNetworks;
QLockFile *m_lockFile;
QString m_leelaversion;

Order getWorkInternal(bool tuning);
Order getWork(bool tuning = false);
Expand Down
4 changes: 3 additions & 1 deletion msvc/VS2015/leela-zero.vcxproj
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
Expand Down Expand Up @@ -93,6 +93,7 @@
<ClCompile Include="..\..\src\Training.cpp" />
<ClCompile Include="..\..\src\Tuner.cpp" />
<ClCompile Include="..\..\src\UCTNode.cpp" />
<ClCompile Include="..\..\src\UCTNodePointer.cpp" />
<ClCompile Include="..\..\src\UCTNodeRoot.cpp" />
<ClCompile Include="..\..\src\UCTSearch.cpp" />
<ClCompile Include="..\..\src\Utils.cpp" />
Expand Down Expand Up @@ -121,6 +122,7 @@
<ClInclude Include="..\..\src\Training.h" />
<ClInclude Include="..\..\src\Tuner.h" />
<ClInclude Include="..\..\src\UCTNode.h" />
<ClInclude Include="..\..\src\UCTNodePointer.h" />
<ClInclude Include="..\..\src\UCTSearch.h" />
<ClInclude Include="..\..\src\Utils.h" />
<ClInclude Include="..\..\src\Zobrist.h" />
Expand Down
8 changes: 8 additions & 0 deletions src/GTP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ std::vector<int> cfg_gpus;
bool cfg_sgemm_exhaustive;
bool cfg_tune_only;
#endif
int cfg_puct_factor;
float cfg_backup_pct;
int cfg_backup_type;
bool cfg_pseudo_backup;
float cfg_puct;
float cfg_softmax_temp;
float cfg_fpu_reduction;
Expand Down Expand Up @@ -95,6 +99,10 @@ void GTP::setup_default_parameters() {
cfg_sgemm_exhaustive = false;
cfg_tune_only = false;
#endif
cfg_puct_factor = 1;
cfg_backup_pct = 50.0;
cfg_backup_type = 3;
cfg_pseudo_backup = true;
cfg_puct = 0.8f;
cfg_softmax_temp = 1.0f;
cfg_fpu_reduction = 0.25f;
Expand Down
2 changes: 2 additions & 0 deletions src/GTP.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ extern std::vector<int> cfg_gpus;
extern bool cfg_sgemm_exhaustive;
extern bool cfg_tune_only;
#endif
extern int cfg_puct_factor;
extern float cfg_backup_pct;
extern float cfg_puct;
extern float cfg_softmax_temp;
extern float cfg_fpu_reduction;
Expand Down
40 changes: 38 additions & 2 deletions src/Leela.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,22 @@ static void parse_commandline(int argc, char *argv[]) {
("noponder", "Disable thinking on opponent's time.")
("benchmark", "Test network and exit. Default args:\n-v3200 --noponder "
"-m0 -t1 -s1.")

("puct-factor", po::value<int>()->default_value(cfg_puct_factor),
"0: original (=1), 1: linear (=winrate*2, default), 2: quadratic (=winrate(1-winrate)*4).")
("backup-pct", po::value<float>()->default_value(cfg_backup_pct),
"Update (backup) Q-values (winrates) of white's moves that are ancestors of the new leaf node "
"with a probability determined by winrate at root node and this parameter.\n"
"At most 100, defaulted to 50.\n"
"The lower the value, the weaker you assume white to be.")
("backup-type", po::value<int>()->default_value(cfg_backup_type),
"0: throw a dice to go up a generation,\n"
"1: always update, 2: never update,\n"
"3: throw dice once for each simulation (default),\n"
"4: throw dice once for each ancestor,\n"
"5: update the foremost ancestors only.")
("pseudo-backup", po::value<std::string>()->default_value("on"),
"[on|off] Whether to increment visit count when value is not actually updated.")
;
#ifdef USE_OPENCL
po::options_description gpu_desc("GPU options");
Expand All @@ -102,13 +118,13 @@ static void parse_commandline(int argc, char *argv[]) {
po::value<float>()->default_value(cfg_random_temp),
"Temperature to use for random move selection.")
;
#ifdef USE_TUNER
po::options_description tuner_desc("Tuning options");
tuner_desc.add_options()
("puct", po::value<float>())
("softmax_temp", po::value<float>())
("fpu_reduction", po::value<float>())
;
#ifdef USE_TUNER
#endif
// These won't be shown, we use them to catch incorrect usage of the
// command line.
Expand Down Expand Up @@ -167,7 +183,26 @@ static void parse_commandline(int argc, char *argv[]) {
cfg_quiet = true; // Set this early to avoid unnecessary output.
}

#ifdef USE_TUNER
if (vm.count("puct-factor")) {
cfg_puct_factor = vm["puct-factor"].as<int>();
}
if (vm.count("backup-pct")) {
cfg_backup_pct = vm["backup-pct"].as<float>();
if (cfg_backup_pct > 100.0) {
cfg_backup_pct = 50.0;
myprintf("Invalid backup percentage. Falling back to 50.0.\n");
}
}
if (vm.count("backup-type")) {
cfg_backup_type = vm["backup-type"].as<int>();
}
if (vm.count("pseudo-backup")) {
auto pb = vm["pseudo-backup"].as<std::string>();
if (pb == "off") {
cfg_pseudo_backup = false;
}
}

if (vm.count("puct")) {
cfg_puct = vm["puct"].as<float>();
}
Expand All @@ -177,6 +212,7 @@ static void parse_commandline(int argc, char *argv[]) {
if (vm.count("fpu_reduction")) {
cfg_fpu_reduction = vm["fpu_reduction"].as<float>();
}
#ifdef USE_TUNER
#endif

if (vm.count("logfile")) {
Expand Down
4 changes: 2 additions & 2 deletions src/Network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -970,8 +970,8 @@ Network::Netresult Network::get_scored_moves_internal(
const auto winrate_out =
innerproduct<256, 1, false>(winrate_data, ip2_val_w, ip2_val_b);

// Sigmoid
const auto winrate_sig = (1.0f + std::tanh(winrate_out[0])) / 2.0f;
// Sigmoid: tanh normalized to take value in (0,1)
const auto winrate_sig = 1.0f / (1.0f + std::exp(-2.0f * winrate_out[0]));

Netresult result;

Expand Down
7 changes: 6 additions & 1 deletion src/SMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,12 @@ SMP::Lock::Lock(Mutex & m) {

void SMP::Lock::lock() {
assert(!m_owns_lock);
while (m_mutex->m_lock.exchange(true, std::memory_order_acquire) == true);
// Test and Test-and-Set reduces memory contention
// However, just trying to Test-and-Set first improves performance in almost
// all cases
while (m_mutex->m_lock.exchange(true, std::memory_order_acquire)) {
while (m_mutex->m_lock.load(std::memory_order_relaxed));
}
m_owns_lock = true;
}

Expand Down
23 changes: 20 additions & 3 deletions src/UCTNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ bool UCTNode::create_children(std::atomic<int>& nodecount,
if (state.board.white_to_move()) {
m_net_eval = 1.0f - m_net_eval;
}
update(m_net_eval);
eval = m_net_eval;

std::vector<Network::ScoreVertexPair> nodelist;
Expand Down Expand Up @@ -255,11 +256,13 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) {
// Count parentvisits manually to avoid issues with transpositions.
auto total_visited_policy = 0.0f;
auto parentvisits = size_t{0};
auto parent_total_blackeval = get_pure_eval(FastBoard::BLACK);
for (const auto& child : m_children) {
if (child.valid()) {
parentvisits += child.get_visits();
if (child.get_visits() > 0) {
total_visited_policy += child.get_score();
parent_total_blackeval += child.get_blackevals();
}
}
}
Expand All @@ -270,9 +273,18 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) {
// Do not do this if we have introduced noise at this node exactly
// to explore more.

auto pure_eval = get_pure_eval(color);
parentvisits++;
auto pure_eval = parent_total_blackeval / float(parentvisits);
if (color == FastBoard::WHITE) {
pure_eval = 1.0 - pure_eval;
}
if (!is_root || !cfg_noise) {
fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy) * pure_eval / 0.5;
fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy);
if (cfg_puct_factor == 2) {
fpu_reduction *= pure_eval * (1 - pure_eval) / 0.25;
} else if (cfg_puct_factor == 1) {
fpu_reduction *= pure_eval / 0.5;
}
}
// Estimated eval for unknown nodes = current parent winrate - reduction
auto fpu_eval = pure_eval - fpu_reduction;
Expand All @@ -291,7 +303,12 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) {
}
auto psa = child.get_score();
auto denom = 1.0 + child.get_visits();
auto puct = cfg_puct * psa * (numerator / denom) * pure_eval / 0.5;
auto puct = cfg_puct * psa * (numerator / denom);
if (cfg_puct_factor == 2) {
puct *= pure_eval * (1 - pure_eval) / 0.25;
} else if (cfg_puct_factor == 1) {
puct *= pure_eval / 0.5;
}
auto value = winrate + puct;
assert(value > std::numeric_limits<double>::lowest());

Expand Down
5 changes: 5 additions & 0 deletions src/UCTNodePointer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ float UCTNodePointer::get_eval(int tomove) const {
return read_ptr()->get_eval(tomove);
}

double UCTNodePointer::get_blackevals() const {
assert(is_inflated());
return read_ptr()->get_blackevals();
}

int UCTNodePointer::get_move() const {
if (is_inflated()) return read_ptr()->get_move();
return read_vertex();
Expand Down
1 change: 1 addition & 0 deletions src/UCTNodePointer.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ class UCTNodePointer {
int get_move() const;
// this can only be called if it is an inflated pointer
float get_eval(int tomove) const;
double get_blackevals() const;
};

#endif
68 changes: 60 additions & 8 deletions src/UCTSearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@
#include <cstddef>
#include <limits>
#include <memory>
#include <random>
#include <type_traits>

#include "FastBoard.h"
#include "FastState.h"
#include "FullBoard.h"
#include "GTP.h"
#include "GameState.h"
#include "Random.h"
#include "TimeControl.h"
#include "Timing.h"
#include "Training.h"
Expand Down Expand Up @@ -157,8 +159,19 @@ float UCTSearch::get_min_psa_ratio() const {
return 0.0f;
}

float calc_backup_pct (float blackeval) {
// dynamically adjust backup_pct according to root winrate
if (blackeval > 0.5) {
return (100.0 - cfg_backup_pct) * 4.0 * blackeval * (1 - blackeval) + cfg_backup_pct;
} else {
return 100.0;
}
}

SearchResult UCTSearch::play_simulation(GameState & currstate,
UCTNode* const node) {
UCTNode* const node,
float backup_pct,
int depth) {
const auto color = currstate.get_to_move();
auto result = SearchResult{};

Expand All @@ -168,6 +181,7 @@ SearchResult UCTSearch::play_simulation(GameState & currstate,
if (currstate.get_passes() >= 2) {
auto score = currstate.final_score();
result = SearchResult::from_score(score);
node->update(result.eval());
} else if (m_nodes < MAX_TREE_SIZE) {
float eval;
const auto had_children = node->has_children();
Expand All @@ -179,6 +193,9 @@ SearchResult UCTSearch::play_simulation(GameState & currstate,
}
}
}
if (result.valid()) {
result.remaining_backups = - depth * (1.0 - backup_pct / 100.0);
}

if (node->has_children() && !result.valid()) {
auto next = node->uct_select_child(color, node == m_root.get());
Expand All @@ -188,13 +205,48 @@ SearchResult UCTSearch::play_simulation(GameState & currstate,
if (move != FastBoard::PASS && currstate.superko()) {
next->invalidate();
} else {
result = play_simulation(currstate, next);
if (backup_pct > 100.0) {
backup_pct = calc_backup_pct(node->get_pure_eval(FastBoard::BLACK));
}
result = play_simulation(currstate, next, backup_pct, depth + 1);
result.remaining_backups++;
if (result.valid()) {
if (color == FastBoard::BLACK || result.backup_type == 1 || node->get_visits() == 0) {
node->update(result.eval());
}
else if (result.backup_type == 2) {
if (cfg_pseudo_backup) {
node->update(node->get_pure_eval(FastBoard::BLACK));
}
}
else if (result.backup_type == 5) {
if (std::uniform_real_distribution<double>{ 0.0, 1.0 }(Random::get_Rng()) <= result.remaining_backups) {
node->update(result.eval());
}
else {
if (cfg_pseudo_backup) {
node->update(node->get_pure_eval(FastBoard::BLACK));
}
}
}
else if (std::uniform_real_distribution<double>{ 0.0, 100.0 }(Random::get_Rng()) <= backup_pct) {
node->update(result.eval());
if (result.backup_type == 3) {
result.backup_type = 1;
}
}
else {
if (cfg_pseudo_backup) {
node->update(node->get_pure_eval(FastBoard::BLACK));
}
if (result.backup_type == 0 || result.backup_type == 3) {
result.backup_type = 2;
}
}
}
}
}

if (result.valid()) {
node->update(result.eval());
}
node->virtual_loss_undo();

return result;
Expand Down Expand Up @@ -586,7 +638,7 @@ bool UCTSearch::stop_thinking(int elapsed_centis, int time_for_move) const {
void UCTWorker::operator()() {
do {
auto currstate = std::make_unique<GameState>(m_rootstate);
auto result = m_search->play_simulation(*currstate, m_root);
auto result = m_search->play_simulation(*currstate, m_root, 200.0, 0);
if (result.valid()) {
m_search->increment_playouts();
}
Expand Down Expand Up @@ -631,7 +683,7 @@ int UCTSearch::think(int color, passflag_t passflag) {
do {
auto currstate = std::make_unique<GameState>(m_rootstate);

auto result = play_simulation(*currstate, m_root.get());
auto result = play_simulation(*currstate, m_root.get(), 200.0, 0);
if (result.valid()) {
increment_playouts();
}
Expand Down Expand Up @@ -707,7 +759,7 @@ void UCTSearch::ponder() {
auto last_output = 0;
do {
auto currstate = std::make_unique<GameState>(m_rootstate);
auto result = play_simulation(*currstate, m_root.get());
auto result = play_simulation(*currstate, m_root.get(), 200.0, 0);
if (result.valid()) {
increment_playouts();
}
Expand Down
Loading