STEllAR-GROUP · hkaiser · Jan 29, 2024 · Jan 20, 2024
@@ -1,4 +1,4 @@
-# Copyright (c) 2017-2022 Hartmut Kaiser
+# Copyright (c) 2017-2024 Hartmut Kaiser
 #
 # SPDX-License-Identifier: BSL-1.0
 # Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -70,6 +70,8 @@ function(create_configuration_summary message module_name)
             PROPERTY VALUE
           )
           hpx_info("    ${_variableName}=${_value}")
+        else()
+          hpx_info("    value not found for ${_variableName}")
         endif()
 
         string(REPLACE "_WITH_" "_HAVE_" __variableName ${_variableName})

@@ -30,7 +30,7 @@ endif()
 # NLohnmann JSON can be installed by HPX or externally installed. In the first
 # case we use exported targets, in the second we find JSON again using
 # find_package.
-if(HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES)
+if(HPX_COMMAND_LINE_HANDLING_LOCAL_WITH_JSON_CONFIGURATION_FILES)
   if(HPX_WITH_FETCH_JSON)
     include("${CMAKE_CURRENT_LIST_DIR}/HPXJsonTarget.cmake")
   else()

@@ -152,8 +152,9 @@ policy use the command line option
 Work requesting scheduling policies
 -----------------------------------
 
-* invoke using: :option:`--hpx:queuing`\ ``local-workrequesting-fifo``
-  or using :option:`--hpx:queuing`\ ``local-workrequesting-lifo``
+* invoke using: :option:`--hpx:queuing`\ ``local-workrequesting-fifo``,
+  using :option:`--hpx:queuing`\ ``local-workrequesting-lifo``,
+  or using :option:`--hpx:queuing`\ ``local-workrequesting-mc``
 
 The work-requesting policies rely on a different mechanism of balancing work
 between cores (compared to the other policies listed above). Instead of actively

@@ -1573,15 +1573,15 @@ The predefined command line options for any application using
    ``local-priority-fifo``, ``local-priority-lifo``, ``static``,
    ``static-priority``, ``abp-priority-fifo``,
    ``local-workrequesting-fifo``, ``local-workrequesting-lifo``
-   and ``abp-priority-lifo``
+   ``local-workrequesting-mc``, and ``abp-priority-lifo``
    (default: ``local-priority-fifo``).
 
 .. option:: --hpx:high-priority-threads arg
 
    The number of operating system threads maintaining a high priority queue
    (default: number of OS threads), valid for :option:`--hpx:queuing`\
-   ``=abp-priority``, :option:`--hpx:queuing`\ ``=static-priority`` and
-   :option:`--hpx:queuing`\ ``=local-priority`` only.
+   ``=abp-priority``, :option:`--hpx:queuing`\ ``static-priority`` and
+   :option:`--hpx:queuing`\ ``local-priority`` only.
 
 .. option:: --hpx:numa-sensitive
 

@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2023 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -7,13 +7,11 @@
 #pragma once
 
 #include <hpx/config.hpp>
-#include <hpx/assert.hpp>
 #include <hpx/topology/topology.hpp>
 
 #include <atomic>
 #include <cstddef>
 #include <string>
-#include <utility>
 #include <vector>
 
 #include <hpx/config/warnings_prefix.hpp>
@@ -40,21 +38,12 @@ namespace hpx::threads::policies::detail {
             std::string const& affinity_description = "balanced",
             bool use_process_mask = false);
 
-        void set_num_threads(size_t num_threads) noexcept
-        {
-            num_threads_ = num_threads;
-        }
+        void set_num_threads(size_t num_threads) noexcept;
 
         void set_affinity_masks(
-            std::vector<threads::mask_type> const& affinity_masks)
-        {
-            affinity_masks_ = affinity_masks;
-        }
+            std::vector<threads::mask_type> const& affinity_masks);
         void set_affinity_masks(
-            std::vector<threads::mask_type>&& affinity_masks) noexcept
-        {
-            affinity_masks_ = HPX_MOVE(affinity_masks);
-        }
+            std::vector<threads::mask_type>&& affinity_masks) noexcept;
 
         constexpr std::size_t get_num_threads() const noexcept
         {
@@ -69,19 +58,9 @@ namespace hpx::threads::policies::detail {
         std::size_t get_thread_occupancy(
             threads::topology const& topo, std::size_t pu_num) const;
 
-        std::size_t get_pu_num(std::size_t num_thread) const noexcept
-        {
-            HPX_ASSERT(num_thread < pu_nums_.size());
-            return pu_nums_[num_thread];
-        }
-        void set_pu_nums(std::vector<std::size_t> const& pu_nums)
-        {
-            pu_nums_ = pu_nums;
-        }
-        void set_pu_nums(std::vector<std::size_t>&& pu_nums) noexcept
-        {
-            pu_nums_ = HPX_MOVE(pu_nums);
-        }
+        std::size_t get_pu_num(std::size_t num_thread) const noexcept;
+        void set_pu_nums(std::vector<std::size_t> const& pu_nums);
+        void set_pu_nums(std::vector<std::size_t>&& pu_nums) noexcept;
 
         void add_punit(std::size_t virt_core, std::size_t thread_num);
         void init_cached_pu_nums(std::size_t hardware_concurrency);

@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2023 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -24,7 +24,7 @@ namespace hpx::threads::policies::detail {
         std::vector<mask_type> const& masks) noexcept
     {
         std::size_t count = 0;
-        for (mask_cref_type m : masks)
+        for (mask_cref_type const m : masks)
         {
             if (threads::any(m))
                 ++count;
@@ -38,6 +38,7 @@ namespace hpx::threads::policies::detail {
       , pu_step_(1)
       , used_cores_(0)
       , affinity_domain_("pu")
+      , no_affinity_()
       , use_process_mask_(false)
       , num_pus_needed_(0)
     {
@@ -122,7 +123,7 @@ namespace hpx::threads::policies::detail {
         }
         else if (pu_offset == static_cast<std::size_t>(-1))
         {
-            // calculate the pu offset based on the used cores, but only if its
+            // calculate the pu offset based on the used cores, but only if it's
             // not explicitly specified
             for (std::size_t num_core = 0; num_core != used_cores; ++num_core)
             {
@@ -154,6 +155,23 @@ namespace hpx::threads::policies::detail {
         num_pus_needed_ = (std::max)(num_unique_cores, max_cores);
     }
 
+    void affinity_data::set_num_threads(size_t num_threads) noexcept
+    {
+        num_threads_ = num_threads;
+    }
+
+    void affinity_data::set_affinity_masks(
+        std::vector<threads::mask_type> const& affinity_masks)
+    {
+        affinity_masks_ = affinity_masks;
+    }
+
+    void affinity_data::set_affinity_masks(
+        std::vector<threads::mask_type>&& affinity_masks) noexcept
+    {
+        affinity_masks_ = HPX_MOVE(affinity_masks);
+    }
+
     mask_type affinity_data::get_pu_mask(
         threads::topology const& topo, std::size_t global_thread_num) const
     {
@@ -181,16 +199,15 @@ namespace hpx::threads::policies::detail {
         }
         if (0 == std::string("core").find(affinity_domain_))
         {
-            // The affinity domain is 'core', return a bit mask corresponding
-            // to all processing units of the core containing the given
-            // pu_num.
+            // The affinity domain is 'core', return a bit mask corresponding to
+            // all processing units of the core containing the given pu_num.
             return topo.get_core_affinity_mask(pu_num);
         }
         if (0 == std::string("numa").find(affinity_domain_))
         {
-            // The affinity domain is 'numa', return a bit mask corresponding
-            // to all processing units of the NUMA domain containing the
-            // given pu_num.
+            // The affinity domain is 'numa', return a bit mask corresponding to
+            // all processing units of the NUMA domain containing the given
+            // pu_num.
             return topo.get_numa_node_affinity_mask(pu_num);
         }
 
@@ -219,7 +236,7 @@ namespace hpx::threads::policies::detail {
         for (std::size_t thread_num = 0; thread_num != num_threads_;
              ++thread_num)
         {
-            auto thread_mask = get_pu_mask(topo, thread_num);
+            auto const thread_mask = get_pu_mask(topo, thread_num);
             for (std::size_t i = 0; i != overall_threads; ++i)
             {
                 if (threads::test(thread_mask, i))
@@ -251,14 +268,31 @@ namespace hpx::threads::policies::detail {
             for (std::size_t num_thread = 0; num_thread != num_threads_;
                  ++num_thread)
             {
-                mask_cref_type affinity_mask = get_pu_mask(topo, num_thread);
+                mask_cref_type const affinity_mask =
+                    get_pu_mask(topo, num_thread);
                 if (threads::any(pu_mask & affinity_mask))
                     ++count;
             }
         }
         return count;
     }
 
+    std::size_t affinity_data::get_pu_num(std::size_t num_thread) const noexcept
+    {
+        HPX_ASSERT(num_thread < pu_nums_.size());
+        return pu_nums_[num_thread];
+    }
+
+    void affinity_data::set_pu_nums(std::vector<std::size_t> const& pu_nums)
+    {
+        pu_nums_ = pu_nums;
+    }
+
+    void affinity_data::set_pu_nums(std::vector<std::size_t>&& pu_nums) noexcept
+    {
+        pu_nums_ = HPX_MOVE(pu_nums);
+    }
+
     // means of adding a processing unit after initialization
     void affinity_data::add_punit(std::size_t virt_core, std::size_t thread_num)
     {
@@ -309,17 +343,16 @@ namespace hpx::threads::policies::detail {
         // The distance between assigned processing units shouldn't be zero
         HPX_ASSERT(pu_step_ > 0 && pu_step_ <= hardware_concurrency);
 
-        // We 'scale' the thread number to compute the corresponding
-        // processing unit number.
+        // We 'scale' the thread number to compute the corresponding processing
+        // unit number.
         //
-        // The base line processing unit number is computed from the given
+        // The baseline processing unit number is computed from the given
         // pu-offset and pu-step.
         std::size_t const num_pu = pu_offset_ + pu_step_ * num_thread;
 
-        // We add an additional offset, which allows to 'roll over' if the
-        // pu number would get larger than the number of available
-        // processing units. Note that it does not make sense to 'roll over'
-        // farther than the given pu-step.
+        // We add an offset, which allows to 'roll over' if the pu number would
+        // get larger than the number of available processing units. Note that
+        // it does not make sense to 'roll over' farther than the given pu-step.
         std::size_t const offset = (num_pu / hardware_concurrency) % pu_step_;
 
         // The resulting pu number has to be smaller than the available

@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2023 The STE||AR-Group
+# Copyright (c) 2019-2024 The STE||AR-Group
 #
 # SPDX-License-Identifier: BSL-1.0
 # Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -9,17 +9,25 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
 
 # Enable reading JSON formatted configuration files on the command line
+set(HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES_DEFAULT OFF)
+if(HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES)
+  set(HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES_DEFAULT
+      ${HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES}
+  )
+endif()
+
 hpx_option(
-  HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES
+  HPX_COMMAND_LINE_HANDLING_LOCAL_WITH_JSON_CONFIGURATION_FILES
   BOOL
-  "Enable reading JSON formatted configuration files on the command line. (default: OFF)"
-  OFF
+  "Enable reading JSON formatted configuration files on the command line.\n
+  (default: ${HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES_DEFAULT})"
+  ${HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES_DEFAULT}
   ADVANCED
   CATEGORY "Modules"
   MODULE COMMAND_LINE_HANDLING_LOCAL
 )
 
-if(HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES)
+if(HPX_COMMAND_LINE_HANDLING_LOCAL_WITH_JSON_CONFIGURATION_FILES)
   hpx_add_config_define_namespace(
     DEFINE HPX_COMMAND_LINE_HANDLING_HAVE_JSON_CONFIGURATION_FILES
     NAMESPACE COMMAND_LINE_HANDLING_LOCAL
@@ -37,7 +45,7 @@ set(command_line_handling_local_sources
     parse_command_line_local.cpp
 )
 
-if(HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES)
+if(HPX_COMMAND_LINE_HANDLING_LOCAL_WITH_JSON_CONFIGURATION_FILES)
   include(HPX_SetupJSON)
   set(command_line_handling_local_dependencies Json::json)
 

@@ -476,12 +476,16 @@ namespace hpx::local::detail {
                     "larger than number of threads (--hpx:threads)");
             }
 
-            if (!(queuing_ == "local-priority" || queuing_ == "abp-priority"))
+            if (!(queuing_ == "local-priority" || queuing_ == "abp-priority" ||
+                    queuing_.find("local-workrequesting") != 0))
             {
                 throw hpx::detail::command_line_error(
                     "Invalid command line option --hpx:high-priority-threads, "
-                    "valid for --hpx:queuing=local-priority and "
-                    "--hpx:queuing=abp-priority only");
+                    "valid for --hpx:queuing=local-priority, "
+                    "--hpx:queuing=local-workrequesting-fifo, "
+                    "--hpx:queuing=local-workrequesting-lifo, "
+                    "--hpx:queuing=local-workrequesting-mc, "
+                    "and --hpx:queuing=abp-priority only");
             }
 
             ini_config.emplace_back("hpx.thread_queue.high_priority_queues!=" +

@@ -480,6 +480,7 @@ namespace hpx::local::detail {
                 "--hpx:queuing=static, --hpx:queuing=static-priority, "
                 "--hpx:queuing=local-workrequesting-fifo, "
                 "--hpx:queuing=local-workrequesting-lifo, "
+                "--hpx:queuing=local-workrequesting-mc, "
                 "and --hpx:queuing=local-priority only")
             ("hpx:pu-step", value<std::size_t>(),
                 "the step between used processing unit numbers for this "
@@ -488,6 +489,7 @@ namespace hpx::local::detail {
                 "--hpx:queuing=static, --hpx:queuing=static-priority "
                 "--hpx:queuing=local-workrequesting-fifo, "
                 "--hpx:queuing=local-workrequesting-lifo, "
+                "--hpx:queuing=local-workrequesting-mc, "
                 "and --hpx:queuing=local-priority only")
             ("hpx:affinity", value<std::string>(),
                 "the affinity domain the OS threads will be confined to, "
@@ -496,6 +498,7 @@ namespace hpx::local::detail {
                 "--hpx:queuing=static, --hpx:queuing=static-priority "
                 "--hpx:queuing=local-workrequesting-fifo, "
                 "--hpx:queuing=local-workrequesting-lifo, "
+                "--hpx:queuing=local-workrequesting-mc, "
                 " and --hpx:queuing=local-priority only")
             ("hpx:bind", value<std::vector<std::string> >()->composing(),
                 "the detailed affinity description for the OS threads, see "
@@ -515,21 +518,23 @@ namespace hpx::local::detail {
                 "each processing unit")
             ("hpx:cores", value<std::string>(),
                 "the number of cores to utilize for this HPX "
-                "locality (default: 'all', i.e. the number of cores is based on "
-                "the number of total cores in the system)")
+                "locality (default: 'all', i.e. the number of cores is based "
+                "on the number of total cores in the system)")
             ("hpx:queuing", value<std::string>(),
                 "the queue scheduling policy to use, options are "
                 "'local', 'local-priority-fifo','local-priority-lifo', "
                 "'abp-priority-fifo', 'abp-priority-lifo', 'static', "
-                "'static-priority', 'local-workrequesting-fifo', and "
-                "'local-workrequesting-lifo' (default: 'local-priority'; "
-                "all option values can be abbreviated)")
+                "'static-priority', 'local-workrequesting-fifo',"
+                "'local-workrequesting-lifo', and 'local-workrequesting-mc' "
+                "(default: 'local-priority'; all option values can be "
+                "abbreviated)")
             ("hpx:high-priority-threads", value<std::size_t>(),
                 "the number of operating system threads maintaining a high "
                 "priority queue (default: number of OS threads), valid for "
                 "--hpx:queuing=local-priority,--hpx:queuing=static-priority, "
                 "--hpx:queuing=local-workrequesting-fifo, "
                 "--hpx:queuing=local-workrequesting-lifo, "
+                "--hpx:queuing=local-workrequesting-mc, "
                 " and --hpx:queuing=abp-priority only)")
             ("hpx:numa-sensitive", value<std::size_t>()->implicit_value(0),
                 "makes the local-priority scheduler NUMA sensitive ("

@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023 The STE||AR-Group
+# Copyright (c) 2020-2024 The STE||AR-Group
 #               2011 Bryce Adelstein-Lelbach
 #
 # SPDX-License-Identifier: BSL-1.0
@@ -7,7 +7,7 @@
 
 set(tests)
 
-if(HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES)
+if(HPX_COMMAND_LINE_HANDLING_LOCAL_WITH_JSON_CONFIGURATION_FILES)
   set(tests json_config_file)
 
   set(json_config_file_PARAMETERS