Skip to content

Commit

Permalink
Exclude low visit moves from random selection.
Browse files Browse the repository at this point in the history
Right now the randomized move selection will still pick moves that only
got a single visit. That means that no matter how badly they are
evaluated, we still have a fair chance of playing them. This means our
learning cycle can't avoid some really bad blunders.

There's some indication that using FPU=parent causes these blunders to
stay happening when ahead, as opposed to the AGZ method of using
FPU=0.5, which ends up filtering them. Because we really want to keep
FPU=parent, filter moves with only a single visit to get a similar
effect, i.e. to not pick moves with a really bad evalution.

This defaults the minimum visits to 1, and also adds a configurable
temperature to move selection, defaulted at t=1.

Pull request leela-zero#1376.
  • Loading branch information
gcp authored May 9, 2018
1 parent cab4e92 commit 9f5c8d2
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 21 deletions.
4 changes: 4 additions & 0 deletions src/GTP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ int cfg_lagbuffer_cs;
int cfg_resignpct;
int cfg_noise;
int cfg_random_cnt;
int cfg_random_min_visits;
float cfg_random_temp;
std::uint64_t cfg_rng_seed;
bool cfg_dumbpass;
#ifdef USE_OPENCL
Expand Down Expand Up @@ -99,6 +101,8 @@ void GTP::setup_default_parameters() {
cfg_resignpct = -1;
cfg_noise = false;
cfg_random_cnt = 0;
cfg_random_min_visits = 1;
cfg_random_temp = 1.0f;
cfg_dumbpass = false;
cfg_logfile_handle = nullptr;
cfg_quiet = false;
Expand Down
2 changes: 2 additions & 0 deletions src/GTP.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ extern int cfg_lagbuffer_cs;
extern int cfg_resignpct;
extern int cfg_noise;
extern int cfg_random_cnt;
extern int cfg_random_min_visits;
extern float cfg_random_temp;
extern std::uint64_t cfg_rng_seed;
extern bool cfg_dumbpass;
#ifdef USE_OPENCL
Expand Down
66 changes: 50 additions & 16 deletions src/Leela.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ static void license_blurb() {
static void parse_commandline(int argc, char *argv[]) {
namespace po = boost::program_options;
// Declare the supported options.
po::options_description v_desc("Allowed options");
v_desc.add_options()
po::options_description gen_desc("Generic options");
gen_desc.add_options()
("help,h", "Show commandline options.")
("gtp,g", "Enable GTP mode.")
("threads,t", po::value<int>()->default_value(cfg_num_threads),
Expand All @@ -63,46 +63,72 @@ static void parse_commandline(int argc, char *argv[]) {
"Requires --noponder.")
("visits,v", po::value<int>(),
"Weaken engine by limiting the number of visits.")
("timemanage", po::value<std::string>()->default_value("auto"),
"[auto|on|off|fast] Enable time management features.\n"
"auto = off when using -m, otherwise on")
("lagbuffer,b", po::value<int>()->default_value(cfg_lagbuffer_cs),
"Safety margin for time usage in centiseconds.")
("resignpct,r", po::value<int>()->default_value(cfg_resignpct),
"Resign when winrate is less than x%.\n"
"-1 uses 10% but scales for handicap.")
("randomcnt,m", po::value<int>()->default_value(cfg_random_cnt),
"Play more randomly the first x moves.")
("noise,n", "Enable policy network randomization.")
("seed,s", po::value<std::uint64_t>(),
"Random number generation seed.")
("dumbpass,d", "Don't use heuristics for smarter passing.")
("weights,w", po::value<std::string>(), "File with network weights.")
("logfile,l", po::value<std::string>(), "File to log input/output to.")
("quiet,q", "Disable all diagnostic output.")
("timemanage", po::value<std::string>()->default_value("auto"),
"[auto|on|off|fast] Enable time management features.\n"
"auto = off when using -m, otherwise on")
("noponder", "Disable thinking on opponent's time.")
("benchmark", "Test network and exit. Default args:\n-v3200 --noponder "
"-m0 -t1 -s1.")
;
#ifdef USE_OPENCL
po::options_description gpu_desc("GPU options");
gpu_desc.add_options()
("gpu", po::value<std::vector<int> >(),
"ID of the OpenCL device(s) to use (disables autodetection).")
("full-tuner", "Try harder to find an optimal OpenCL tuning.")
("tune-only", "Tune OpenCL only and then exit.")
;
#endif
po::options_description selfplay_desc("Self-play options");
selfplay_desc.add_options()
("noise,n", "Enable policy network randomization.")
("seed,s", po::value<std::uint64_t>(),
"Random number generation seed.")
("dumbpass,d", "Don't use heuristics for smarter passing.")
("randomcnt,m", po::value<int>()->default_value(cfg_random_cnt),
"Play more randomly the first x moves.")
("randomvisits",
po::value<int>()->default_value(cfg_random_min_visits),
"Don't play random moves if they have <= x visits.")
("randomtemp",
po::value<float>()->default_value(cfg_random_temp),
"Temperature to use for random move selection.")
;
#ifdef USE_TUNER
po::options_description tuner_desc("Tuning options");
tuner_desc.add_options()
("puct", po::value<float>())
("softmax_temp", po::value<float>())
("fpu_reduction", po::value<float>())
#endif
;
#endif
// These won't be shown, we use them to catch incorrect usage of the
// command line.
po::options_description h_desc("Hidden options");
h_desc.add_options()
("arguments", po::value<std::vector<std::string>>());
po::options_description visible;
visible.add(gen_desc)
#ifdef USE_OPENCL
.add(gpu_desc)
#endif
.add(selfplay_desc)
#ifdef USE_TUNER
.add(tuner_desc);
#else
;
#endif
// Parse both the above, we will check if any of the latter are present.
po::options_description all("All options");
all.add(v_desc).add(h_desc);
po::options_description all;
all.add(visible).add(h_desc);
po::positional_options_description p_desc;
p_desc.add("arguments", -1);
po::variables_map vm;
Expand All @@ -113,7 +139,7 @@ static void parse_commandline(int argc, char *argv[]) {
} catch(const boost::program_options::error& e) {
printf("ERROR: %s\n", e.what());
license_blurb();
std::cout << v_desc << std::endl;
std::cout << visible << std::endl;
exit(EXIT_FAILURE);
}

Expand All @@ -129,7 +155,7 @@ static void parse_commandline(int argc, char *argv[]) {
ev = EXIT_FAILURE;
}
license_blurb();
std::cout << v_desc << std::endl;
std::cout << visible << std::endl;
exit(ev);
}

Expand Down Expand Up @@ -233,6 +259,14 @@ static void parse_commandline(int argc, char *argv[]) {
cfg_random_cnt = vm["randomcnt"].as<int>();
}

if (vm.count("randomvisits")) {
cfg_random_min_visits = vm["randomvisits"].as<int>();
}

if (vm.count("randomtemp")) {
cfg_random_temp = vm["randomtemp"].as<float>();
}

if (vm.count("timemanage")) {
auto tm = vm["timemanage"].as<std::string>();
if (tm == "auto") {
Expand Down
24 changes: 19 additions & 5 deletions src/UCTNodeRoot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,28 @@ void UCTNode::dirichlet_noise(float epsilon, float alpha) {
}

void UCTNode::randomize_first_proportionally() {
auto accum = std::uint64_t{0};
auto accum_vector = std::vector<decltype(accum)>{};
auto accum = 0.0;
auto norm_factor = 0.0;
auto accum_vector = std::vector<double>{};

for (const auto& child : m_children) {
accum += child->get_visits();
accum_vector.emplace_back(accum);
auto visits = child->get_visits();
if (norm_factor == 0.0) {
norm_factor = visits;
// Nonsensical options? End of game?
if (visits <= cfg_random_min_visits) {
return;
}
}
if (visits > cfg_random_min_visits) {
accum += std::pow(visits / norm_factor,
1.0 / cfg_random_temp);
accum_vector.emplace_back(accum);
}
}

auto pick = Random::get_Rng().randuint64(accum);
auto distribution = std::uniform_real_distribution<double>{0.0, accum};
auto pick = distribution(Random::get_Rng());
auto index = size_t{0};
for (size_t i = 0; i < accum_vector.size(); i++) {
if (pick < accum_vector[i]) {
Expand Down

0 comments on commit 9f5c8d2

Please sign in to comment.