alreadydone · alreadydone · Dec 18, 2018 · Dec 19, 2018 · Dec 19, 2018
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -18,7 +18,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
 include(GNUInstallDirs)
 
 project(leelaz)
-add_subdirectory(gtest EXCLUDE_FROM_ALL) # We don't want to install gtest, exclude it from `all`
+#add_subdirectory(gtest EXCLUDE_FROM_ALL) # We don't want to install gtest, exclude it from `all`
 
 # Required Packages
 set(Boost_MIN_VERSION "1.58.0")
@@ -33,7 +33,8 @@ find_package(OpenCL REQUIRED)
 if(NOT APPLE)
   set(BLA_VENDOR OpenBLAS)
 endif()
-if(USE_BLAS)
+
+#if(USE_BLAS)
   message(STATUS "Looking for system BLAS/OpenBLAS library.")
   find_package(BLAS REQUIRED)
   find_path(BLAS_INCLUDE_DIRS openblas_config.h
@@ -44,9 +45,10 @@ if(USE_BLAS)
     /usr/include/x86_64-linux-gnu
     $ENV{BLAS_HOME}/include)
     add_definitions(-DUSE_BLAS)
-else()
-message(STATUS "Using built-in matrix library.")
-endif()
+  message( STATUS "BLAS found: ${BLAS_LIBRARIES}" )
+#else()
+#message(STATUS "Using built-in matrix library.")
+#endif()
 find_package(Qt5Core)
 
 set(CMAKE_CXX_STANDARD 14)
@@ -130,18 +132,18 @@ else()
 endif()
 
 # Google Test below
-file(GLOB tests_SRC "${SrcPath}/tests/*.cpp")
-
-add_executable(tests ${tests_SRC} $<TARGET_OBJECTS:objs>)
-if(GccSpecificFlags)
-  target_compile_options(tests PRIVATE "-Wno-unused-variable")
-endif()
-
-target_link_libraries(tests ${Boost_LIBRARIES})
-target_link_libraries(tests ${BLAS_LIBRARIES})
-target_link_libraries(tests ${OpenCL_LIBRARIES})
-target_link_libraries(tests ${ZLIB_LIBRARIES})
-target_link_libraries(tests gtest_main ${CMAKE_THREAD_LIBS_INIT})
+#file(GLOB tests_SRC "${SrcPath}/tests/*.cpp")
+
+#add_executable(tests ${tests_SRC} $<TARGET_OBJECTS:objs>)
+#if(GccSpecificFlags)
+#  target_compile_options(tests PRIVATE "-Wno-unused-variable")
+#endif()
+
+#target_link_libraries(tests ${Boost_LIBRARIES})
+#target_link_libraries(tests ${BLAS_LIBRARIES})
+#target_link_libraries(tests ${OpenCL_LIBRARIES})
+#target_link_libraries(tests ${ZLIB_LIBRARIES})
+#target_link_libraries(tests gtest_main ${CMAKE_THREAD_LIBS_INIT})
 
 include(GetGitRevisionDescription)
 git_describe(VERSION --tags)

diff --git a/autogtp/README.md b/autogtp/README.md
@@ -33,7 +33,7 @@ directly.
 Copy the compiled leelaz binary into the autogtp directory, and run
 autogtp.
 
-    cp ../build/leelaz .
+    cp ../src/leelaz .
     ./autogtp
 
 While autogtp is running, typing q+Enter will save the processed data and exit. When autogtp runs next, autogtp will continue the game.
diff --git a/src/CPUPipe.cpp b/src/CPUPipe.cpp
@@ -291,16 +291,16 @@ void CPUPipe::winograd_convolve3(const int outputs,
 
 template<unsigned int filter_size>
 void convolve(const size_t outputs,
+              const size_t biases_size,
               const std::vector<float>& input,
               const std::vector<float>& weights,
-              const std::vector<float>& biases,
               std::vector<float>& output) {
     // The size of the board is defined at compile time
     constexpr unsigned int width = BOARD_SIZE;
     constexpr unsigned int height = BOARD_SIZE;
     constexpr auto num_intersections = width * height;
     constexpr auto filter_len = filter_size * filter_size;
-    const auto input_channels = weights.size() / (biases.size() * filter_len);
+    const auto input_channels = weights.size() / (biases_size * filter_len);
     const auto filter_dim = filter_len * input_channels;
     assert(outputs * num_intersections == output.size());
 
@@ -333,41 +333,59 @@ void convolve(const size_t outputs,
         * ConstEigenMatrixMap<float>(weights.data(), filter_dim, outputs);
 #endif
 
+    /*
     for (unsigned int o = 0; o < outputs; o++) {
         for (unsigned int b = 0; b < num_intersections; b++) {
             output[(o * num_intersections) + b] += biases[o];
         }
-    }
+    }*/
 }
 
 template <size_t spatial_size>
 void batchnorm(const size_t channels,
                std::vector<float>& data,
                const float* const means,
                const float* const stddevs,
+               const float* const gammas,
+               const float* const betas,
                const float* const eltwise = nullptr) {
     const auto lambda_ReLU = [](const auto val) { return (val > 0.0f) ?
                                                           val : 0.0f; };
     for (auto c = size_t{0}; c < channels; ++c) {
         const auto mean = means[c];
         const auto scale_stddev = stddevs[c];
+        const auto gamma = gammas[c];
+        const auto beta = betas[c];
         const auto arr = &data[c * spatial_size];
 
         if (eltwise == nullptr) {
             // Classical BN
             for (auto b = size_t{0}; b < spatial_size; b++) {
-                arr[b] = lambda_ReLU(scale_stddev * (arr[b] - mean));
+                arr[b] = lambda_ReLU(scale_stddev * (arr[b] - mean) * gamma + beta);
             }
         } else {
             // BN + residual add
             const auto res = &eltwise[c * spatial_size];
             for (auto b = size_t{0}; b < spatial_size; b++) {
-                arr[b] = lambda_ReLU((scale_stddev * (arr[b] - mean)) + res[b]);
+                arr[b] = lambda_ReLU((scale_stddev * (arr[b] - mean) * gamma + beta) + res[b]);
             }
         }
     }
 }
 
+template <size_t spatial_size>
+void add_bias(const size_t channels,
+             std::vector<float>& data,
+             const float* const bias)
+{
+    for (auto c = size_t{0}; c < channels; ++c) {
+        const auto arr = &data[c * spatial_size];
+        for (auto b = size_t{0}; b < spatial_size; b++) {
+            arr[b] += bias[c];
+        }
+    }
+}
+
 void CPUPipe::forward0(std::unique_ptr<const std::vector<float>> input,
                        const int tomove,
                        const int symmetry,
@@ -391,33 +409,47 @@ void CPUPipe::forward(const std::vector<float>& input,
     auto M = std::vector<float>(WINOGRAD_TILE * output_channels * P);
 
     winograd_convolve3(output_channels, input, m_weights->m_conv_weights[0], V, M, conv_out);
-    batchnorm<NUM_INTERSECTIONS>(output_channels, conv_out,
-                                 m_weights->m_batchnorm_means[0].data(),
-                                 m_weights->m_batchnorm_stddevs[0].data());
+    add_bias<NUM_INTERSECTIONS>(output_channels, conv_out, m_weights->m_conv_biases[0].data());
 
     // Residual tower
     auto conv_in = std::vector<float>(output_channels * NUM_INTERSECTIONS);
     auto res = std::vector<float>(output_channels * NUM_INTERSECTIONS);
     for (auto i = size_t{1}; i < m_weights->m_conv_weights.size(); i += 2) {
         auto output_channels = m_input_channels;
         std::swap(conv_out, conv_in);
+        res = conv_in;
+        batchnorm<NUM_INTERSECTIONS>(output_channels, conv_in,
+                                     m_weights->m_batchnorm_means[i - 1].data(),
+                                     m_weights->m_batchnorm_stddevs[i - 1].data(),
+                                     m_weights->m_batchnorm_gammas[i - 1].data(),
+                                     m_weights->m_batchnorm_betas[i - 1].data());
         winograd_convolve3(output_channels, conv_in,
                            m_weights->m_conv_weights[i], V, M, conv_out);
-        batchnorm<NUM_INTERSECTIONS>(output_channels, conv_out,
-                                     m_weights->m_batchnorm_means[i].data(),
-                                     m_weights->m_batchnorm_stddevs[i].data());
+        add_bias<NUM_INTERSECTIONS>(output_channels, conv_out, m_weights->m_conv_biases[i].data());
 
-        std::swap(conv_in, res);
         std::swap(conv_out, conv_in);
+        batchnorm<NUM_INTERSECTIONS>(output_channels, conv_in,
+                                     m_weights->m_batchnorm_means[i].data(),
+                                     m_weights->m_batchnorm_stddevs[i].data(),
+                                     m_weights->m_batchnorm_gammas[i].data(),
+                                     m_weights->m_batchnorm_betas[i].data());
+
         winograd_convolve3(output_channels, conv_in,
                            m_weights->m_conv_weights[i + 1], V, M, conv_out);
-        batchnorm<NUM_INTERSECTIONS>(output_channels, conv_out,
-                                     m_weights->m_batchnorm_means[i + 1].data(),
-                                     m_weights->m_batchnorm_stddevs[i + 1].data(),
-                                     res.data());
+        add_bias<NUM_INTERSECTIONS>(output_channels, conv_out, m_weights->m_conv_biases[i + 1].data());
+
+        for (auto k = size_t{0}; k < conv_out.size(); ++k)
+            conv_out[k] += res[k];
     }
-    convolve<1>(Network::OUTPUTS_POLICY, conv_out, m_conv_pol_w, m_conv_pol_b, output_pol);
-    convolve<1>(Network::OUTPUTS_VALUE, conv_out, m_conv_val_w, m_conv_val_b, output_val);
+
+    batchnorm<NUM_INTERSECTIONS>(output_channels, conv_out,
+                                 m_weights->m_batchnorm_means.back().data(),
+                                 m_weights->m_batchnorm_stddevs.back().data(),
+                                 m_weights->m_batchnorm_gammas.back().data(),
+                                 m_weights->m_batchnorm_betas.back().data());
+
+    convolve<1>(Network::OUTPUTS_POLICY, m_conv_pol_b.size(), conv_out, m_conv_pol_w, output_pol);
+    convolve<1>(Network::OUTPUTS_VALUE, m_conv_val_b.size(), conv_out, m_conv_val_w, output_val);
 }
 
 void CPUPipe::push_weights(unsigned int /*filter_size*/,

diff --git a/src/ForwardPipe.h b/src/ForwardPipe.h
@@ -38,6 +38,8 @@ class ForwardPipe {
         std::vector<std::vector<float>> m_conv_biases;
         std::vector<std::vector<float>> m_batchnorm_means;
         std::vector<std::vector<float>> m_batchnorm_stddevs;
+        std::vector<std::vector<float>> m_batchnorm_gammas;
+        std::vector<std::vector<float>> m_batchnorm_betas;
 
         // Policy head
         std::vector<float> m_conv_pol_w;

diff --git a/src/GTP.cpp b/src/GTP.cpp
@@ -133,7 +133,7 @@ void GTP::setup_default_parameters() {
     cfg_precision = precision_t::HALF;
 #endif
 #endif
-    cfg_puct = 0.8f;
+    cfg_puct = 1.25f;
     cfg_softmax_temp = 1.0f;
     cfg_fpu_reduction = 0.25f;
     // see UCTSearch::should_resign
@@ -644,7 +644,7 @@ void GTP::execute(GameState & game, const std::string& xinput) {
         std::string vertex = game.move_to_text(move);
         myprintf("%s\n", vertex.c_str());
         return;
-    } else if (command.find("heatmap") == 0) {
+	} else if (command.find("heatmap") == 0) {
         std::istringstream cmdstream(command);
         std::string tmp;
         std::string symmetry;