diff --git a/cmake/HPX_PrintSummary.cmake b/cmake/HPX_PrintSummary.cmake
index 8c5b3811d5fd..1ee832edd598 100644
--- a/cmake/HPX_PrintSummary.cmake
+++ b/cmake/HPX_PrintSummary.cmake
@@ -1,4 +1,4 @@
-# Copyright (c) 2017-2022 Hartmut Kaiser
+# Copyright (c) 2017-2024 Hartmut Kaiser
 #
 # SPDX-License-Identifier: BSL-1.0
 # Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -70,6 +70,8 @@ function(create_configuration_summary message module_name)
             PROPERTY VALUE
           )
           hpx_info("    ${_variableName}=${_value}")
+        else()
+          hpx_info("    value not found for ${_variableName}")
         endif()
 
         string(REPLACE "_WITH_" "_HAVE_" __variableName ${_variableName})
diff --git a/cmake/templates/HPXConfig.cmake.in b/cmake/templates/HPXConfig.cmake.in
index c3f0ae31b8f7..5c2c7260f96e 100644
--- a/cmake/templates/HPXConfig.cmake.in
+++ b/cmake/templates/HPXConfig.cmake.in
@@ -30,7 +30,7 @@ endif()
 # NLohnmann JSON can be installed by HPX or externally installed. In the first
 # case we use exported targets, in the second we find JSON again using
 # find_package.
-if(HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES)
+if(HPX_COMMAND_LINE_HANDLING_LOCAL_WITH_JSON_CONFIGURATION_FILES)
   if(HPX_WITH_FETCH_JSON)
     include("${CMAKE_CURRENT_LIST_DIR}/HPXJsonTarget.cmake")
   else()
diff --git a/docs/sphinx/manual/hpx_runtime_and_resources.rst b/docs/sphinx/manual/hpx_runtime_and_resources.rst
index 195427972ed9..1a879e5abab4 100644
--- a/docs/sphinx/manual/hpx_runtime_and_resources.rst
+++ b/docs/sphinx/manual/hpx_runtime_and_resources.rst
@@ -152,8 +152,9 @@ policy use the command line option
 Work requesting scheduling policies
 -----------------------------------
 
-* invoke using: :option:`--hpx:queuing`\ ``local-workrequesting-fifo``
-  or using :option:`--hpx:queuing`\ ``local-workrequesting-lifo``
+* invoke using: :option:`--hpx:queuing`\ ``local-workrequesting-fifo``,
+  using :option:`--hpx:queuing`\ ``local-workrequesting-lifo``,
+  or using :option:`--hpx:queuing`\ ``local-workrequesting-mc``
 
 The work-requesting policies rely on a different mechanism of balancing work
 between cores (compared to the other policies listed above). Instead of actively
diff --git a/docs/sphinx/manual/launching_and_configuring_hpx_applications.rst b/docs/sphinx/manual/launching_and_configuring_hpx_applications.rst
index 2b92572f4e10..20d7348e7644 100644
--- a/docs/sphinx/manual/launching_and_configuring_hpx_applications.rst
+++ b/docs/sphinx/manual/launching_and_configuring_hpx_applications.rst
@@ -1573,15 +1573,15 @@ The predefined command line options for any application using
    ``local-priority-fifo``, ``local-priority-lifo``, ``static``,
    ``static-priority``, ``abp-priority-fifo``,
    ``local-workrequesting-fifo``, ``local-workrequesting-lifo``
-   and ``abp-priority-lifo``
+   ``local-workrequesting-mc``, and ``abp-priority-lifo``
    (default: ``local-priority-fifo``).
 
 .. option:: --hpx:high-priority-threads arg
 
    The number of operating system threads maintaining a high priority queue
    (default: number of OS threads), valid for :option:`--hpx:queuing`\
-   ``=abp-priority``, :option:`--hpx:queuing`\ ``=static-priority`` and
-   :option:`--hpx:queuing`\ ``=local-priority`` only.
+   ``=abp-priority``, :option:`--hpx:queuing`\ ``static-priority`` and
+   :option:`--hpx:queuing`\ ``local-priority`` only.
 
 .. option:: --hpx:numa-sensitive
 
diff --git a/libs/core/affinity/include/hpx/affinity/affinity_data.hpp b/libs/core/affinity/include/hpx/affinity/affinity_data.hpp
index bc3ce2524f2b..02a5a2291a1c 100644
--- a/libs/core/affinity/include/hpx/affinity/affinity_data.hpp
+++ b/libs/core/affinity/include/hpx/affinity/affinity_data.hpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2023 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -7,13 +7,11 @@
 #pragma once
 
 #include <hpx/config.hpp>
-#include <hpx/assert.hpp>
 #include <hpx/topology/topology.hpp>
 
 #include <atomic>
 #include <cstddef>
 #include <string>
-#include <utility>
 #include <vector>
 
 #include <hpx/config/warnings_prefix.hpp>
@@ -40,21 +38,12 @@ namespace hpx::threads::policies::detail {
             std::string const& affinity_description = "balanced",
             bool use_process_mask = false);
 
-        void set_num_threads(size_t num_threads) noexcept
-        {
-            num_threads_ = num_threads;
-        }
+        void set_num_threads(size_t num_threads) noexcept;
 
         void set_affinity_masks(
-            std::vector<threads::mask_type> const& affinity_masks)
-        {
-            affinity_masks_ = affinity_masks;
-        }
+            std::vector<threads::mask_type> const& affinity_masks);
         void set_affinity_masks(
-            std::vector<threads::mask_type>&& affinity_masks) noexcept
-        {
-            affinity_masks_ = HPX_MOVE(affinity_masks);
-        }
+            std::vector<threads::mask_type>&& affinity_masks) noexcept;
 
         constexpr std::size_t get_num_threads() const noexcept
         {
@@ -69,19 +58,9 @@ namespace hpx::threads::policies::detail {
         std::size_t get_thread_occupancy(
             threads::topology const& topo, std::size_t pu_num) const;
 
-        std::size_t get_pu_num(std::size_t num_thread) const noexcept
-        {
-            HPX_ASSERT(num_thread < pu_nums_.size());
-            return pu_nums_[num_thread];
-        }
-        void set_pu_nums(std::vector<std::size_t> const& pu_nums)
-        {
-            pu_nums_ = pu_nums;
-        }
-        void set_pu_nums(std::vector<std::size_t>&& pu_nums) noexcept
-        {
-            pu_nums_ = HPX_MOVE(pu_nums);
-        }
+        std::size_t get_pu_num(std::size_t num_thread) const noexcept;
+        void set_pu_nums(std::vector<std::size_t> const& pu_nums);
+        void set_pu_nums(std::vector<std::size_t>&& pu_nums) noexcept;
 
         void add_punit(std::size_t virt_core, std::size_t thread_num);
         void init_cached_pu_nums(std::size_t hardware_concurrency);
diff --git a/libs/core/affinity/src/affinity_data.cpp b/libs/core/affinity/src/affinity_data.cpp
index 592c45ea3049..2c41ccc0c85c 100644
--- a/libs/core/affinity/src/affinity_data.cpp
+++ b/libs/core/affinity/src/affinity_data.cpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2023 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -24,7 +24,7 @@ namespace hpx::threads::policies::detail {
         std::vector<mask_type> const& masks) noexcept
     {
         std::size_t count = 0;
-        for (mask_cref_type m : masks)
+        for (mask_cref_type const m : masks)
         {
             if (threads::any(m))
                 ++count;
@@ -38,6 +38,7 @@ namespace hpx::threads::policies::detail {
       , pu_step_(1)
       , used_cores_(0)
       , affinity_domain_("pu")
+      , no_affinity_()
       , use_process_mask_(false)
       , num_pus_needed_(0)
     {
@@ -122,7 +123,7 @@ namespace hpx::threads::policies::detail {
         }
         else if (pu_offset == static_cast<std::size_t>(-1))
         {
-            // calculate the pu offset based on the used cores, but only if its
+            // calculate the pu offset based on the used cores, but only if it's
             // not explicitly specified
             for (std::size_t num_core = 0; num_core != used_cores; ++num_core)
             {
@@ -154,6 +155,23 @@ namespace hpx::threads::policies::detail {
         num_pus_needed_ = (std::max)(num_unique_cores, max_cores);
     }
 
+    void affinity_data::set_num_threads(size_t num_threads) noexcept
+    {
+        num_threads_ = num_threads;
+    }
+
+    void affinity_data::set_affinity_masks(
+        std::vector<threads::mask_type> const& affinity_masks)
+    {
+        affinity_masks_ = affinity_masks;
+    }
+
+    void affinity_data::set_affinity_masks(
+        std::vector<threads::mask_type>&& affinity_masks) noexcept
+    {
+        affinity_masks_ = HPX_MOVE(affinity_masks);
+    }
+
     mask_type affinity_data::get_pu_mask(
         threads::topology const& topo, std::size_t global_thread_num) const
     {
@@ -181,16 +199,15 @@ namespace hpx::threads::policies::detail {
         }
         if (0 == std::string("core").find(affinity_domain_))
         {
-            // The affinity domain is 'core', return a bit mask corresponding
-            // to all processing units of the core containing the given
-            // pu_num.
+            // The affinity domain is 'core', return a bit mask corresponding to
+            // all processing units of the core containing the given pu_num.
             return topo.get_core_affinity_mask(pu_num);
         }
         if (0 == std::string("numa").find(affinity_domain_))
         {
-            // The affinity domain is 'numa', return a bit mask corresponding
-            // to all processing units of the NUMA domain containing the
-            // given pu_num.
+            // The affinity domain is 'numa', return a bit mask corresponding to
+            // all processing units of the NUMA domain containing the given
+            // pu_num.
             return topo.get_numa_node_affinity_mask(pu_num);
         }
 
@@ -219,7 +236,7 @@ namespace hpx::threads::policies::detail {
         for (std::size_t thread_num = 0; thread_num != num_threads_;
              ++thread_num)
         {
-            auto thread_mask = get_pu_mask(topo, thread_num);
+            auto const thread_mask = get_pu_mask(topo, thread_num);
             for (std::size_t i = 0; i != overall_threads; ++i)
             {
                 if (threads::test(thread_mask, i))
@@ -251,7 +268,8 @@ namespace hpx::threads::policies::detail {
             for (std::size_t num_thread = 0; num_thread != num_threads_;
                  ++num_thread)
             {
-                mask_cref_type affinity_mask = get_pu_mask(topo, num_thread);
+                mask_cref_type const affinity_mask =
+                    get_pu_mask(topo, num_thread);
                 if (threads::any(pu_mask & affinity_mask))
                     ++count;
             }
@@ -259,6 +277,22 @@ namespace hpx::threads::policies::detail {
         return count;
     }
 
+    std::size_t affinity_data::get_pu_num(std::size_t num_thread) const noexcept
+    {
+        HPX_ASSERT(num_thread < pu_nums_.size());
+        return pu_nums_[num_thread];
+    }
+
+    void affinity_data::set_pu_nums(std::vector<std::size_t> const& pu_nums)
+    {
+        pu_nums_ = pu_nums;
+    }
+
+    void affinity_data::set_pu_nums(std::vector<std::size_t>&& pu_nums) noexcept
+    {
+        pu_nums_ = HPX_MOVE(pu_nums);
+    }
+
     // means of adding a processing unit after initialization
     void affinity_data::add_punit(std::size_t virt_core, std::size_t thread_num)
     {
@@ -309,17 +343,16 @@ namespace hpx::threads::policies::detail {
         // The distance between assigned processing units shouldn't be zero
         HPX_ASSERT(pu_step_ > 0 && pu_step_ <= hardware_concurrency);
 
-        // We 'scale' the thread number to compute the corresponding
-        // processing unit number.
+        // We 'scale' the thread number to compute the corresponding processing
+        // unit number.
         //
-        // The base line processing unit number is computed from the given
+        // The baseline processing unit number is computed from the given
         // pu-offset and pu-step.
         std::size_t const num_pu = pu_offset_ + pu_step_ * num_thread;
 
-        // We add an additional offset, which allows to 'roll over' if the
-        // pu number would get larger than the number of available
-        // processing units. Note that it does not make sense to 'roll over'
-        // farther than the given pu-step.
+        // We add an offset, which allows to 'roll over' if the pu number would
+        // get larger than the number of available processing units. Note that
+        // it does not make sense to 'roll over' farther than the given pu-step.
         std::size_t const offset = (num_pu / hardware_concurrency) % pu_step_;
 
         // The resulting pu number has to be smaller than the available
diff --git a/libs/core/command_line_handling_local/CMakeLists.txt b/libs/core/command_line_handling_local/CMakeLists.txt
index 51d744771110..2476a399aa21 100644
--- a/libs/core/command_line_handling_local/CMakeLists.txt
+++ b/libs/core/command_line_handling_local/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2023 The STE||AR-Group
+# Copyright (c) 2019-2024 The STE||AR-Group
 #
 # SPDX-License-Identifier: BSL-1.0
 # Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -9,17 +9,25 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
 
 # Enable reading JSON formatted configuration files on the command line
+set(HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES_DEFAULT OFF)
+if(HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES)
+  set(HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES_DEFAULT
+      ${HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES}
+  )
+endif()
+
 hpx_option(
-  HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES
+  HPX_COMMAND_LINE_HANDLING_LOCAL_WITH_JSON_CONFIGURATION_FILES
   BOOL
-  "Enable reading JSON formatted configuration files on the command line. (default: OFF)"
-  OFF
+  "Enable reading JSON formatted configuration files on the command line.\n
+  (default: ${HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES_DEFAULT})"
+  ${HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES_DEFAULT}
   ADVANCED
   CATEGORY "Modules"
   MODULE COMMAND_LINE_HANDLING_LOCAL
 )
 
-if(HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES)
+if(HPX_COMMAND_LINE_HANDLING_LOCAL_WITH_JSON_CONFIGURATION_FILES)
   hpx_add_config_define_namespace(
     DEFINE HPX_COMMAND_LINE_HANDLING_HAVE_JSON_CONFIGURATION_FILES
     NAMESPACE COMMAND_LINE_HANDLING_LOCAL
@@ -37,7 +45,7 @@ set(command_line_handling_local_sources
     parse_command_line_local.cpp
 )
 
-if(HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES)
+if(HPX_COMMAND_LINE_HANDLING_LOCAL_WITH_JSON_CONFIGURATION_FILES)
   include(HPX_SetupJSON)
   set(command_line_handling_local_dependencies Json::json)
 
diff --git a/libs/core/command_line_handling_local/src/command_line_handling_local.cpp b/libs/core/command_line_handling_local/src/command_line_handling_local.cpp
index fbffe1e7a3d6..2d24a3fda7b1 100644
--- a/libs/core/command_line_handling_local/src/command_line_handling_local.cpp
+++ b/libs/core/command_line_handling_local/src/command_line_handling_local.cpp
@@ -476,12 +476,16 @@ namespace hpx::local::detail {
                     "larger than number of threads (--hpx:threads)");
             }
 
-            if (!(queuing_ == "local-priority" || queuing_ == "abp-priority"))
+            if (!(queuing_ == "local-priority" || queuing_ == "abp-priority" ||
+                    queuing_.find("local-workrequesting") != 0))
             {
                 throw hpx::detail::command_line_error(
                     "Invalid command line option --hpx:high-priority-threads, "
-                    "valid for --hpx:queuing=local-priority and "
-                    "--hpx:queuing=abp-priority only");
+                    "valid for --hpx:queuing=local-priority, "
+                    "--hpx:queuing=local-workrequesting-fifo, "
+                    "--hpx:queuing=local-workrequesting-lifo, "
+                    "--hpx:queuing=local-workrequesting-mc, "
+                    "and --hpx:queuing=abp-priority only");
             }
 
             ini_config.emplace_back("hpx.thread_queue.high_priority_queues!=" +
diff --git a/libs/core/command_line_handling_local/src/parse_command_line_local.cpp b/libs/core/command_line_handling_local/src/parse_command_line_local.cpp
index 6352b81c22e0..9ca8aa6b23f6 100644
--- a/libs/core/command_line_handling_local/src/parse_command_line_local.cpp
+++ b/libs/core/command_line_handling_local/src/parse_command_line_local.cpp
@@ -480,6 +480,7 @@ namespace hpx::local::detail {
                 "--hpx:queuing=static, --hpx:queuing=static-priority, "
                 "--hpx:queuing=local-workrequesting-fifo, "
                 "--hpx:queuing=local-workrequesting-lifo, "
+                "--hpx:queuing=local-workrequesting-mc, "
                 "and --hpx:queuing=local-priority only")
             ("hpx:pu-step", value<std::size_t>(),
                 "the step between used processing unit numbers for this "
@@ -488,6 +489,7 @@ namespace hpx::local::detail {
                 "--hpx:queuing=static, --hpx:queuing=static-priority "
                 "--hpx:queuing=local-workrequesting-fifo, "
                 "--hpx:queuing=local-workrequesting-lifo, "
+                "--hpx:queuing=local-workrequesting-mc, "
                 "and --hpx:queuing=local-priority only")
             ("hpx:affinity", value<std::string>(),
                 "the affinity domain the OS threads will be confined to, "
@@ -496,6 +498,7 @@ namespace hpx::local::detail {
                 "--hpx:queuing=static, --hpx:queuing=static-priority "
                 "--hpx:queuing=local-workrequesting-fifo, "
                 "--hpx:queuing=local-workrequesting-lifo, "
+                "--hpx:queuing=local-workrequesting-mc, "
                 " and --hpx:queuing=local-priority only")
             ("hpx:bind", value<std::vector<std::string> >()->composing(),
                 "the detailed affinity description for the OS threads, see "
@@ -515,21 +518,23 @@ namespace hpx::local::detail {
                 "each processing unit")
             ("hpx:cores", value<std::string>(),
                 "the number of cores to utilize for this HPX "
-                "locality (default: 'all', i.e. the number of cores is based on "
-                "the number of total cores in the system)")
+                "locality (default: 'all', i.e. the number of cores is based "
+                "on the number of total cores in the system)")
             ("hpx:queuing", value<std::string>(),
                 "the queue scheduling policy to use, options are "
                 "'local', 'local-priority-fifo','local-priority-lifo', "
                 "'abp-priority-fifo', 'abp-priority-lifo', 'static', "
-                "'static-priority', 'local-workrequesting-fifo', and "
-                "'local-workrequesting-lifo' (default: 'local-priority'; "
-                "all option values can be abbreviated)")
+                "'static-priority', 'local-workrequesting-fifo',"
+                "'local-workrequesting-lifo', and 'local-workrequesting-mc' "
+                "(default: 'local-priority'; all option values can be "
+                "abbreviated)")
             ("hpx:high-priority-threads", value<std::size_t>(),
                 "the number of operating system threads maintaining a high "
                 "priority queue (default: number of OS threads), valid for "
                 "--hpx:queuing=local-priority,--hpx:queuing=static-priority, "
                 "--hpx:queuing=local-workrequesting-fifo, "
                 "--hpx:queuing=local-workrequesting-lifo, "
+                "--hpx:queuing=local-workrequesting-mc, "
                 " and --hpx:queuing=abp-priority only)")
             ("hpx:numa-sensitive", value<std::size_t>()->implicit_value(0),
                 "makes the local-priority scheduler NUMA sensitive ("
diff --git a/libs/core/command_line_handling_local/tests/unit/CMakeLists.txt b/libs/core/command_line_handling_local/tests/unit/CMakeLists.txt
index 91426727ae8c..522b9ab9e8cf 100644
--- a/libs/core/command_line_handling_local/tests/unit/CMakeLists.txt
+++ b/libs/core/command_line_handling_local/tests/unit/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023 The STE||AR-Group
+# Copyright (c) 2020-2024 The STE||AR-Group
 #               2011 Bryce Adelstein-Lelbach
 #
 # SPDX-License-Identifier: BSL-1.0
@@ -7,7 +7,7 @@
 
 set(tests)
 
-if(HPX_COMMAND_LINE_HANDLING_WITH_JSON_CONFIGURATION_FILES)
+if(HPX_COMMAND_LINE_HANDLING_LOCAL_WITH_JSON_CONFIGURATION_FILES)
   set(tests json_config_file)
 
   set(json_config_file_PARAMETERS
diff --git a/libs/core/concurrency/include/hpx/concurrency/concurrentqueue.hpp b/libs/core/concurrency/include/hpx/concurrency/concurrentqueue.hpp
index df2ca35cdf5c..43894dd5219e 100644
--- a/libs/core/concurrency/include/hpx/concurrency/concurrentqueue.hpp
+++ b/libs/core/concurrency/include/hpx/concurrency/concurrentqueue.hpp
@@ -91,13 +91,13 @@
 #include <thread>    // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading
 
 // Platform-specific definitions of a numeric thread ID type and an invalid value
-namespace hpx { namespace concurrency { namespace details {
+namespace hpx::concurrency::details {
     template<typename thread_id_t> struct thread_id_converter {
         typedef thread_id_t thread_id_numeric_size_t;
         typedef thread_id_t thread_id_hash_t;
         static thread_id_hash_t prehash(thread_id_t const& x) { return x; }
     };
-} } }
+}
 #if defined(MCDBGQ_USE_RELACY)
 namespace hpx { namespace concurrency { namespace details {
     typedef std::uint32_t thread_id_t;
@@ -109,15 +109,15 @@ namespace hpx { namespace concurrency { namespace details {
 // No sense pulling in windows.h in a header, we'll manually declare the function
 // we use and rely on backwards-compatibility for this not to break
 extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
-namespace hpx { namespace concurrency { namespace details {
+namespace hpx::concurrency::details {
     static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), "Expected size of unsigned long to be 32 bits on Windows");
     typedef std::uint32_t thread_id_t;
-    static const thread_id_t invalid_thread_id  = 0;      // See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx
-    static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU;  // Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4.
+    static constexpr thread_id_t invalid_thread_id  = 0;      // See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx
+    static constexpr thread_id_t invalid_thread_id2 = 0xFFFFFFFFU;  // Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4.
     static inline thread_id_t thread_id() { return static_cast<thread_id_t>(::GetCurrentThreadId()); }
-} } }
+}
 #elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE)
-namespace hpx { namespace concurrency { namespace details {
+namespace hpx::concurrency::details {
     static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes");
 
     typedef std::thread::id thread_id_t;
@@ -149,7 +149,7 @@ namespace hpx { namespace concurrency { namespace details {
 #endif
         }
     };
-} } }
+}
 #else
 // Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475
 // In order to get a numeric thread ID in a platform-independent way, we use a thread-local
@@ -162,12 +162,12 @@ namespace hpx { namespace concurrency { namespace details {
 // Assume C++11 compliant compiler
 #define MOODYCAMEL_THREADLOCAL thread_local
 #endif
-namespace hpx { namespace concurrency { namespace details {
+namespace hpx::concurrency::details {
     typedef std::uintptr_t thread_id_t;
-    static const thread_id_t invalid_thread_id  = 0;    // Address can't be nullptr
-    static const thread_id_t invalid_thread_id2 = 1;    // Member accesses off a null pointer are also generally invalid. Plus it's not aligned.
+    static constexpr thread_id_t invalid_thread_id  = 0;    // Address can't be nullptr
+    static constexpr thread_id_t invalid_thread_id2 = 1;    // Member accesses off a null pointer are also generally invalid. Plus it's not aligned.
     static inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast<thread_id_t>(&x); }
-} } }
+}
 #endif
 
 // Exceptions
@@ -235,7 +235,7 @@ namespace hpx { namespace concurrency { namespace details {
 #endif
 
 // Compiler-specific likely/unlikely hints
-namespace hpx { namespace concurrency { namespace details {
+namespace hpx::concurrency::details {
 #if defined(__GNUC__)
     static inline bool (likely)(bool x) { return __builtin_expect((x), true); }
     static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); }
@@ -243,13 +243,13 @@ namespace hpx { namespace concurrency { namespace details {
     static inline bool (likely)(bool x) { return x; }
     static inline bool (unlikely)(bool x) { return x; }
 #endif
-} } }
+}
 
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
 #include "internal/concurrentqueue_internal_debug.h"
 #endif
 
-namespace hpx { namespace concurrency {
+namespace hpx::concurrency {
 namespace details {
     template<typename T>
     struct const_numeric_max {
@@ -3650,7 +3650,7 @@ inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, ty
     a.swap(b);
 }
 
-} }
+}
 
 #if defined(__GNUC__)
 #pragma GCC diagnostic pop
diff --git a/libs/core/config/include/hpx/config.hpp b/libs/core/config/include/hpx/config.hpp
index 15e528f682bc..c3b3425d4bad 100644
--- a/libs/core/config/include/hpx/config.hpp
+++ b/libs/core/config/include/hpx/config.hpp
@@ -438,4 +438,8 @@
 #  define HPX_HAVE_MAX_CPU_COUNT 256
 #endif
 
+#if !defined(HPX_HAVE_MAX_CPU_COUNT)
+#define HPX_HAVE_MAX_CPU_COUNT 64
+#endif
+
 // clang-format on
diff --git a/libs/core/resource_partitioner/include/hpx/resource_partitioner/detail/partitioner.hpp b/libs/core/resource_partitioner/include/hpx/resource_partitioner/detail/partitioner.hpp
index 48ad39ddfa9b..ac91cdbd4b75 100644
--- a/libs/core/resource_partitioner/include/hpx/resource_partitioner/detail/partitioner.hpp
+++ b/libs/core/resource_partitioner/include/hpx/resource_partitioner/detail/partitioner.hpp
@@ -1,5 +1,5 @@
 //  Copyright (c) 2017 Shoshana Jakobovits
-//  Copyright (c) 2017-2022 Hartmut Kaiser
+//  Copyright (c) 2017-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -50,7 +50,7 @@ namespace hpx::resource::detail {
         // counter ... overall, in all the thread pools
         static std::size_t num_threads_overall;
 
-        init_pool_data(std::string const& name, scheduling_policy policy,
+        init_pool_data(std::string const& name, scheduling_policy sched,
             hpx::threads::policies::scheduler_mode mode,
             background_work_function func = background_work_function());
 
@@ -75,7 +75,7 @@ namespace hpx::resource::detail {
         hpx::threads::policies::scheduler_mode mode_;
         scheduler_function create_function_;
 
-        // possible additional beckground work to run on this scheduler
+        // possible additional background work to run on this scheduler
         background_work_function background_work_;
     };
 
@@ -86,6 +86,12 @@ namespace hpx::resource::detail {
 
     public:
         partitioner();
+
+        partitioner(partitioner const&) = delete;
+        partitioner(partitioner&&) = delete;
+        partitioner& operator=(partitioner const&) = delete;
+        partitioner& operator=(partitioner&&) = delete;
+
         ~partitioner();
 
         void print_init_pool_data(std::ostream&) const;
@@ -203,7 +209,7 @@ namespace hpx::resource::detail {
     private:
         ////////////////////////////////////////////////////////////////////////
         void fill_topology_vectors();
-        bool pu_exposed(std::size_t pid);
+        bool pu_exposed(std::size_t pu_num) const;
 
         ////////////////////////////////////////////////////////////////////////
         // called in hpx_init run_or_start
@@ -232,7 +238,7 @@ namespace hpx::resource::detail {
         // counter for instance numbers
         static std::atomic<int> instance_number_counter_;
 
-        // holds all of the command line switches
+        // holds all the command line switches
         util::section rtcfg_;
         std::size_t first_core_;
         std::size_t pus_needed_;
diff --git a/libs/core/resource_partitioner/include/hpx/resource_partitioner/partitioner_fwd.hpp b/libs/core/resource_partitioner/include/hpx/resource_partitioner/partitioner_fwd.hpp
index 379879be4b01..4af76f7a195c 100644
--- a/libs/core/resource_partitioner/include/hpx/resource_partitioner/partitioner_fwd.hpp
+++ b/libs/core/resource_partitioner/include/hpx/resource_partitioner/partitioner_fwd.hpp
@@ -107,6 +107,7 @@ namespace hpx::resource {
         shared_priority = 7,
         local_workrequesting_fifo = 8,
         local_workrequesting_lifo = 9,
+        local_workrequesting_mc = 10,
     };
 
 #define HPX_SCHEDULING_POLICY_UNSCOPED_ENUM_DEPRECATION_MSG                    \
diff --git a/libs/core/resource_partitioner/src/detail_partitioner.cpp b/libs/core/resource_partitioner/src/detail_partitioner.cpp
index 8dad2d6379ce..4ab48b3ffef1 100644
--- a/libs/core/resource_partitioner/src/detail_partitioner.cpp
+++ b/libs/core/resource_partitioner/src/detail_partitioner.cpp
@@ -1,5 +1,5 @@
 //  Copyright (c) 2017 Shoshana Jakobovits
-//  Copyright (c) 2017-2022 Hartmut Kaiser
+//  Copyright (c) 2017-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -88,7 +88,7 @@ namespace hpx::resource::detail {
 
     // mechanism for adding resources
     // num threads = number of threads desired on a PU. defaults to 1.
-    // note: if num_threads > 1 => oversubscription
+    // note: if num_threads > 1 => over-subscription
     void init_pool_data::add_resource(
         std::size_t pu_index, bool exclusive, std::size_t num_threads)
     {
@@ -148,6 +148,9 @@ namespace hpx::resource::detail {
         case resource::scheduling_policy::local_workrequesting_lifo:
             sched = "local_workrequesting_lifo";
             break;
+        case resource::scheduling_policy::local_workrequesting_mc:
+            sched = "local_workrequesting_mc";
+            break;
         case resource::scheduling_policy::static_:
             sched = "static";
             break;
@@ -167,7 +170,7 @@ namespace hpx::resource::detail {
 
         os << "\"" << sched << "\" is running on PUs : \n";
 
-        for (threads::mask_cref_type assigned_pu : assigned_pus_)
+        for (threads::mask_cref_type const assigned_pu : assigned_pus_)
         {
             os << hpx::threads::to_string(assigned_pu) << '\n';
         }
@@ -223,9 +226,8 @@ namespace hpx::resource::detail {
 
     ////////////////////////////////////////////////////////////////////////
     partitioner::partitioner()
-      : rtcfg_()
-      , first_core_(std::size_t(-1))
-      , pus_needed_(std::size_t(-1))
+      : first_core_(static_cast<std::size_t>(-1))
+      , pus_needed_(static_cast<std::size_t>(-1))
       , mode_(partitioner_mode::default_)
       , topo_(threads::create_topology())
       , default_scheduler_mode_(threads::policies::scheduler_mode::default_)
@@ -240,7 +242,7 @@ namespace hpx::resource::detail {
 #if defined(HPX_HAVE_MAX_CPU_COUNT)
         if (HPX_HAVE_MAX_CPU_COUNT < topo_.get_number_of_pus())
         {
-            throw_runtime_error("partitioner::partioner",
+            throw_runtime_error("partitioner::partitioner",
                 hpx::util::format(
                     "Currently, HPX_HAVE_MAX_CPU_COUNT is set to {1} "
                     "while your system has {2} processing units. Please "
@@ -251,13 +253,14 @@ namespace hpx::resource::detail {
         }
 #endif
 
-        std::string default_scheduler_mode_str =
+        std::string const default_scheduler_mode_str =
             rtcfg_.get_entry("hpx.default_scheduler_mode", std::string());
         if (!default_scheduler_mode_str.empty())
         {
-            default_scheduler_mode_ = threads::policies::scheduler_mode(
-                hpx::util::from_string<std::size_t>(
-                    default_scheduler_mode_str));
+            default_scheduler_mode_ =
+                static_cast<threads::policies::scheduler_mode>(
+                    hpx::util::from_string<std::size_t>(
+                        default_scheduler_mode_str));
             HPX_ASSERT_MSG(
                 (default_scheduler_mode_ &
                     ~threads::policies::scheduler_mode::all_flags) == 0,
@@ -276,22 +279,22 @@ namespace hpx::resource::detail {
         detail::init_pool_data::num_threads_overall = 0;
     }
 
-    bool partitioner::pu_exposed(std::size_t pu_num)
+    bool partitioner::pu_exposed(std::size_t pu_num) const
     {
         threads::mask_type pu_mask = threads::mask_type();
         threads::resize(
             pu_mask, static_cast<std::size_t>(threads::hardware_concurrency()));
         threads::set(pu_mask, pu_num);
-        threads::topology& topo = get_topology();
 
-        threads::mask_type comp =
+        threads::topology const& topo = get_topology();
+        threads::mask_type const comp =
             affinity_data_.get_used_pus_mask(topo, pu_num);
         return threads::any(comp & pu_mask);
     }
 
     void partitioner::fill_topology_vectors()
     {
-        threads::topology& topo = get_topology();
+        threads::topology const& topo = get_topology();
 
         std::size_t pid = 0;
         std::size_t num_numa_nodes = topo.get_number_of_numa_nodes();
@@ -305,7 +308,8 @@ namespace hpx::resource::detail {
             numa_domains_.emplace_back(i);             // add a numa domain
             numa_domain& nd = numa_domains_.back();    // numa-domain just added
 
-            std::size_t numa_node_cores = topo.get_number_of_numa_node_cores(i);
+            std::size_t const numa_node_cores =
+                topo.get_number_of_numa_node_cores(i);
             nd.cores_.reserve(numa_node_cores);
 
             bool numa_domain_contains_exposed_cores = false;
@@ -316,7 +320,7 @@ namespace hpx::resource::detail {
                 nd.cores_.emplace_back(j, &nd);
                 core& c = nd.cores_.back();
 
-                std::size_t core_pus = topo.get_number_of_core_pus(j);
+                std::size_t const core_pus = topo.get_number_of_core_pus(j);
                 c.pus_.reserve(core_pus);
 
                 bool core_contains_exposed_pus = false;
@@ -328,7 +332,7 @@ namespace hpx::resource::detail {
                     {
                         c.pus_.emplace_back(pid, &c,
                             affinity_data_.get_thread_occupancy(topo, pid));
-                        pu& p = c.pus_.back();
+                        pu const& p = c.pus_.back();
 
                         if (p.thread_occupancy_ == 0)
                         {
@@ -367,10 +371,10 @@ namespace hpx::resource::detail {
         if (first_core_ != first_core)
         {
             std::size_t offset = first_core;
-            std::size_t num_pus_core =
+            std::size_t const num_pus_core =
                 get_topology().get_number_of_core_pus(offset);
 
-            if (first_core_ != std::size_t(-1))
+            if (first_core_ != static_cast<std::size_t>(-1))
             {
                 offset -= first_core_;
             }
@@ -392,10 +396,10 @@ namespace hpx::resource::detail {
 
     std::size_t partitioner::threads_needed() noexcept
     {
-        if (pus_needed_ == std::size_t(-1))
+        if (pus_needed_ == static_cast<std::size_t>(-1))
         {
             pus_needed_ = affinity_data_.get_num_pus_needed();
-            HPX_ASSERT(pus_needed_ != std::size_t(-1));
+            HPX_ASSERT(pus_needed_ != static_cast<std::size_t>(-1));
         }
         return pus_needed_;
     }
@@ -464,7 +468,7 @@ namespace hpx::resource::detail {
         // select the default scheduler
         scheduling_policy default_scheduler;
 
-        std::string default_scheduler_str =
+        std::string const default_scheduler_str =
             rtcfg_.get_entry("hpx.scheduler", std::string());
 
         if (0 == std::string("local").find(default_scheduler_str))
@@ -493,6 +497,11 @@ namespace hpx::resource::detail {
         {
             default_scheduler = scheduling_policy::local_workrequesting_lifo;
         }
+        else if (0 ==
+            std::string("local-workrequesting-mc").find(default_scheduler_str))
+        {
+            default_scheduler = scheduling_policy::local_workrequesting_mc;
+        }
         else if (0 == std::string("static").find(default_scheduler_str))
         {
             default_scheduler = scheduling_policy::static_;
@@ -525,8 +534,8 @@ namespace hpx::resource::detail {
 
         // set this scheduler on the pools that do not have a specified scheduler yet
         std::lock_guard<mutex_type> l(mtx_);
-        std::size_t npools = initial_thread_pools_.size();
-        for (std::size_t i = 0; i != npools; ++i)
+        std::size_t const num_pools = initial_thread_pools_.size();
+        for (std::size_t i = 0; i != num_pools; ++i)
         {
             if (initial_thread_pools_[i].scheduling_policy_ ==
                 scheduling_policy::unspecified)
@@ -586,7 +595,7 @@ namespace hpx::resource::detail {
     // resources called in set_default_pool()
     bool partitioner::check_empty_pools() const
     {
-        std::size_t num_thread_pools = initial_thread_pools_.size();
+        std::size_t const num_thread_pools = initial_thread_pools_.size();
 
         for (std::size_t i = 0; i != num_thread_pools; i++)
         {
@@ -594,7 +603,8 @@ namespace hpx::resource::detail {
             {
                 return true;
             }
-            for (auto assigned_pus : initial_thread_pools_[i].assigned_pus_)
+            for (auto const assigned_pus :
+                initial_thread_pools_[i].assigned_pus_)
             {
                 if (!threads::any(assigned_pus))
                 {
@@ -629,7 +639,7 @@ namespace hpx::resource::detail {
         }
 
         //! if there already exists a pool with this name
-        std::size_t num_thread_pools = initial_thread_pools_.size();
+        std::size_t const num_thread_pools = initial_thread_pools_.size();
         for (std::size_t i = 1; i < num_thread_pools; i++)
         {
             if (pool_name == initial_thread_pools_[i].pool_name_)
@@ -669,7 +679,7 @@ namespace hpx::resource::detail {
         }
 
         //! if there already exists a pool with this name
-        std::size_t num_thread_pools = initial_thread_pools_.size();
+        std::size_t const num_thread_pools = initial_thread_pools_.size();
         for (std::size_t i = 1; i != num_thread_pools; ++i)
         {
             if (pool_name == initial_thread_pools_[i].pool_name_)
@@ -722,11 +732,11 @@ namespace hpx::resource::detail {
 
             // Make sure the total number of requested threads does not exceed
             // the number of threads requested on the command line
-            std::size_t num_threads =
+            std::size_t const num_os_threads =
                 util::get_entry_as<std::size_t>(rtcfg_, "hpx.os_threads", 0);
-            HPX_ASSERT(num_threads != 0);
+            HPX_ASSERT(num_os_threads != 0);
 
-            if (detail::init_pool_data::num_threads_overall > num_threads)
+            if (detail::init_pool_data::num_threads_overall > num_os_threads)
             {
                 l.unlock();
                 throw std::runtime_error("partitioner::add_resource: "
@@ -735,7 +745,7 @@ namespace hpx::resource::detail {
                         detail::init_pool_data::num_threads_overall) +
                     " threads requested by the resource partitioner, but "
                     "only " +
-                    std::to_string(num_threads) +
+                    std::to_string(num_os_threads) +
                     " provided on the command-line.");
             }
         }
@@ -813,7 +823,7 @@ namespace hpx::resource::detail {
         std::unique_lock<mutex_type> l(mtx_);
 
         // look up which scheduler is needed
-        scheduling_policy sched_type =
+        scheduling_policy const sched_type =
             get_pool_data(l, pool_name).scheduling_policy_;
         if (sched_type == scheduling_policy::unspecified)
         {
@@ -836,7 +846,7 @@ namespace hpx::resource::detail {
 
         {
             std::unique_lock<mutex_type> l(mtx_);
-            std::size_t num_thread_pools = initial_thread_pools_.size();
+            std::size_t const num_thread_pools = initial_thread_pools_.size();
             for (size_t i = 0; i != num_thread_pools; ++i)
             {
                 num_threads += get_pool_data(l, i).num_threads_;
@@ -848,7 +858,7 @@ namespace hpx::resource::detail {
         HPX_ASSERT(as_bool(mode_ & partitioner_mode::allow_oversubscription) ||
             num_threads ==
                 util::get_entry_as<std::size_t>(
-                    rtcfg_, "hpx.os_threads", std::size_t(-1)));
+                    rtcfg_, "hpx.os_threads", static_cast<std::size_t>(-1)));
 
         return num_threads;
     }
@@ -1038,7 +1048,7 @@ namespace hpx::resource::detail {
                 "pool '{}' has no non-exclusive pus associated", pool_name);
         }
 
-        for (std::size_t pu_num : pu_nums_to_remove)
+        for (std::size_t const pu_num : pu_nums_to_remove)
         {
             remove_pu(pu_num);
         }
@@ -1085,7 +1095,7 @@ namespace hpx::resource::detail {
                 "pool '{}' has no non-exclusive pus associated", pool_name);
         }
 
-        for (std::size_t pu_num : pu_nums_to_add)
+        for (std::size_t const pu_num : pu_nums_to_add)
         {
             add_pu(pu_num);
         }
@@ -1105,7 +1115,7 @@ namespace hpx::resource::detail {
 
         {
             std::lock_guard<mutex_type> l(mtx_);
-            std::size_t num_pools = initial_thread_pools_.size();
+            std::size_t const num_pools = initial_thread_pools_.size();
             for (std::size_t i = 0; i < num_pools; i++)
             {
                 if (initial_thread_pools_[i].pool_name_ == pool_name)
@@ -1125,7 +1135,7 @@ namespace hpx::resource::detail {
     detail::init_pool_data const& partitioner::get_pool_data(
         std::unique_lock<mutex_type>& l, std::string const& pool_name) const
     {
-        auto pool = std::find_if(initial_thread_pools_.begin(),
+        auto const pool = std::find_if(initial_thread_pools_.begin(),
             initial_thread_pools_.end(),
             [&pool_name](detail::init_pool_data const& itp) -> bool {
                 return (itp.pool_name_ == pool_name);
@@ -1145,7 +1155,7 @@ namespace hpx::resource::detail {
     detail::init_pool_data& partitioner::get_pool_data(
         std::unique_lock<mutex_type>& l, std::string const& pool_name)
     {
-        auto pool = std::find_if(initial_thread_pools_.begin(),
+        auto const pool = std::find_if(initial_thread_pools_.begin(),
             initial_thread_pools_.end(),
             [&pool_name](detail::init_pool_data const& itp) -> bool {
                 return (itp.pool_name_ == pool_name);
@@ -1170,6 +1180,7 @@ namespace hpx::resource::detail {
         os << "the resource partitioner owns "
            << static_cast<std::uint64_t>(initial_thread_pools_.size())
            << " pool(s) : \n";    // -V128
+
         for (auto itp : initial_thread_pools_)
         {
             itp.print_pool(os);
diff --git a/libs/core/resource_partitioner/tests/unit/cross_pool_injection.cpp b/libs/core/resource_partitioner/tests/unit/cross_pool_injection.cpp
index a86e064014b4..eb76baf9cdea 100644
--- a/libs/core/resource_partitioner/tests/unit/cross_pool_injection.cpp
+++ b/libs/core/resource_partitioner/tests/unit/cross_pool_injection.cpp
@@ -288,6 +288,9 @@ int main(int argc, char* argv[])
         hpx::resource::scheduling_policy::static_priority,
         // The shared_priority scheduler sometimes hangs in this test.
         //hpx::resource::scheduling_policy::shared_priority,
+        hpx::resource::scheduling_policy::local_workrequesting_fifo,
+        hpx::resource::scheduling_policy::local_workrequesting_lifo,
+        hpx::resource::scheduling_policy::local_workrequesting_mc,
     };
 
     for (auto const scheduler : schedulers)
diff --git a/libs/core/resource_partitioner/tests/unit/shutdown_suspended_pus.cpp b/libs/core/resource_partitioner/tests/unit/shutdown_suspended_pus.cpp
index e111f0f2735b..9d59612680ac 100644
--- a/libs/core/resource_partitioner/tests/unit/shutdown_suspended_pus.cpp
+++ b/libs/core/resource_partitioner/tests/unit/shutdown_suspended_pus.cpp
@@ -90,6 +90,10 @@ int main(int argc, char* argv[])
             // an assert in the scheduling_loop if a background thread is not
             // created.
             //hpx::resource::scheduling_policy::shared_priority,
+
+            hpx::resource::scheduling_policy::local_workrequesting_fifo,
+            hpx::resource::scheduling_policy::local_workrequesting_lifo,
+            hpx::resource::scheduling_policy::local_workrequesting_mc,
         };
 
         for (auto const scheduler : schedulers)
diff --git a/libs/core/resource_partitioner/tests/unit/suspend_pool.cpp b/libs/core/resource_partitioner/tests/unit/suspend_pool.cpp
index 9560a63671aa..dc949538fabd 100644
--- a/libs/core/resource_partitioner/tests/unit/suspend_pool.cpp
+++ b/libs/core/resource_partitioner/tests/unit/suspend_pool.cpp
@@ -192,6 +192,10 @@ int main(int argc, char* argv[])
         hpx::resource::scheduling_policy::static_,
         hpx::resource::scheduling_policy::static_priority,
         hpx::resource::scheduling_policy::shared_priority,
+
+        hpx::resource::scheduling_policy::local_workrequesting_fifo,
+        hpx::resource::scheduling_policy::local_workrequesting_lifo,
+        hpx::resource::scheduling_policy::local_workrequesting_mc,
     };
 
     for (auto const scheduler : schedulers)
diff --git a/libs/core/resource_partitioner/tests/unit/suspend_pool_external.cpp b/libs/core/resource_partitioner/tests/unit/suspend_pool_external.cpp
index 39151f23ec72..29a1bc91d6f1 100644
--- a/libs/core/resource_partitioner/tests/unit/suspend_pool_external.cpp
+++ b/libs/core/resource_partitioner/tests/unit/suspend_pool_external.cpp
@@ -102,6 +102,10 @@ int main(int argc, char* argv[])
         hpx::resource::scheduling_policy::static_,
         hpx::resource::scheduling_policy::static_priority,
         hpx::resource::scheduling_policy::shared_priority,
+
+        hpx::resource::scheduling_policy::local_workrequesting_fifo,
+        hpx::resource::scheduling_policy::local_workrequesting_lifo,
+        hpx::resource::scheduling_policy::local_workrequesting_mc,
     };
 
     for (auto const scheduler : schedulers)
diff --git a/libs/core/resource_partitioner/tests/unit/suspend_runtime.cpp b/libs/core/resource_partitioner/tests/unit/suspend_runtime.cpp
index a52aa344bb90..71e4b25a218b 100644
--- a/libs/core/resource_partitioner/tests/unit/suspend_runtime.cpp
+++ b/libs/core/resource_partitioner/tests/unit/suspend_runtime.cpp
@@ -71,6 +71,10 @@ int main(int argc, char* argv[])
         hpx::resource::scheduling_policy::static_,
         hpx::resource::scheduling_policy::static_priority,
         hpx::resource::scheduling_policy::shared_priority,
+
+        hpx::resource::scheduling_policy::local_workrequesting_fifo,
+        hpx::resource::scheduling_policy::local_workrequesting_lifo,
+        hpx::resource::scheduling_policy::local_workrequesting_mc,
     };
 
     for (auto const scheduler : schedulers)
diff --git a/libs/core/resource_partitioner/tests/unit/suspend_thread.cpp b/libs/core/resource_partitioner/tests/unit/suspend_thread.cpp
index 107cbc64eb99..bd3ae82c369c 100644
--- a/libs/core/resource_partitioner/tests/unit/suspend_thread.cpp
+++ b/libs/core/resource_partitioner/tests/unit/suspend_thread.cpp
@@ -246,6 +246,10 @@ int main(int argc, char* argv[])
             hpx::resource::scheduling_policy::abp_priority_lifo,
 #endif
             hpx::resource::scheduling_policy::shared_priority,
+
+            hpx::resource::scheduling_policy::local_workrequesting_fifo,
+            hpx::resource::scheduling_policy::local_workrequesting_lifo,
+            hpx::resource::scheduling_policy::local_workrequesting_mc,
         };
 
         for (auto const scheduler : schedulers)
diff --git a/libs/core/resource_partitioner/tests/unit/suspend_thread_external.cpp b/libs/core/resource_partitioner/tests/unit/suspend_thread_external.cpp
index 8881abe9be29..1c6f1f7ebcca 100644
--- a/libs/core/resource_partitioner/tests/unit/suspend_thread_external.cpp
+++ b/libs/core/resource_partitioner/tests/unit/suspend_thread_external.cpp
@@ -206,6 +206,10 @@ int main(int argc, char* argv[])
         hpx::resource::scheduling_policy::static_,
         hpx::resource::scheduling_policy::static_priority,
         hpx::resource::scheduling_policy::shared_priority,
+
+        hpx::resource::scheduling_policy::local_workrequesting_fifo,
+        hpx::resource::scheduling_policy::local_workrequesting_lifo,
+        hpx::resource::scheduling_policy::local_workrequesting_mc,
     };
 
     for (auto const scheduler : schedulers)
diff --git a/libs/core/resource_partitioner/tests/unit/suspend_thread_timed.cpp b/libs/core/resource_partitioner/tests/unit/suspend_thread_timed.cpp
index e82a8e268b98..92b6b7ba31b5 100644
--- a/libs/core/resource_partitioner/tests/unit/suspend_thread_timed.cpp
+++ b/libs/core/resource_partitioner/tests/unit/suspend_thread_timed.cpp
@@ -147,6 +147,9 @@ int main(int argc, char* argv[])
             hpx::resource::scheduling_policy::abp_priority_fifo,
             hpx::resource::scheduling_policy::abp_priority_lifo,
 #endif
+            hpx::resource::scheduling_policy::local_workrequesting_fifo,
+            hpx::resource::scheduling_policy::local_workrequesting_lifo,
+            hpx::resource::scheduling_policy::local_workrequesting_mc,
         };
 
         for (auto const scheduler : schedulers)
diff --git a/libs/core/schedulers/include/hpx/schedulers/local_priority_queue_scheduler.hpp b/libs/core/schedulers/include/hpx/schedulers/local_priority_queue_scheduler.hpp
index fa46c396fc81..f2b42fe1fb84 100644
--- a/libs/core/schedulers/include/hpx/schedulers/local_priority_queue_scheduler.hpp
+++ b/libs/core/schedulers/include/hpx/schedulers/local_priority_queue_scheduler.hpp
@@ -60,7 +60,7 @@ namespace hpx::threads::policies {
         typename StagedQueuing = lockfree_fifo,
         typename TerminatedQueuing =
             default_local_priority_queue_scheduler_terminated_queue>
-    class HPX_CORE_EXPORT local_priority_queue_scheduler : public scheduler_base
+    class local_priority_queue_scheduler : public scheduler_base
     {
     public:
         using has_periodic_maintenance = std::false_type;
diff --git a/libs/core/schedulers/include/hpx/schedulers/local_queue_scheduler.hpp b/libs/core/schedulers/include/hpx/schedulers/local_queue_scheduler.hpp
index 27c8423a85c7..4f3b0c708179 100644
--- a/libs/core/schedulers/include/hpx/schedulers/local_queue_scheduler.hpp
+++ b/libs/core/schedulers/include/hpx/schedulers/local_queue_scheduler.hpp
@@ -53,7 +53,7 @@ namespace hpx::threads::policies {
         typename StagedQueuing = lockfree_fifo,
         typename TerminatedQueuing =
             default_local_queue_scheduler_terminated_queue>
-    class HPX_CORE_EXPORT local_queue_scheduler : public scheduler_base
+    class local_queue_scheduler : public scheduler_base
     {
     public:
         using has_periodic_maintenance = std::false_type;
@@ -925,8 +925,8 @@ namespace hpx::threads::policies {
         detail::affinity_data const& affinity_data_;
 
 #if !defined(HPX_NATIVE_MIC)    // we know that the MIC has one NUMA domain only
-        mask_type steals_in_numa_domain_;
-        mask_type steals_outside_numa_domain_;
+        mask_type steals_in_numa_domain_ = mask_type();
+        mask_type steals_outside_numa_domain_ = mask_type();
 #endif
         std::vector<mask_type> numa_domain_masks_;
         std::vector<mask_type> outside_numa_domain_masks_;
diff --git a/libs/core/schedulers/include/hpx/schedulers/local_workrequesting_scheduler.hpp b/libs/core/schedulers/include/hpx/schedulers/local_workrequesting_scheduler.hpp
index bd9909fb9472..a740cffde3a6 100644
--- a/libs/core/schedulers/include/hpx/schedulers/local_workrequesting_scheduler.hpp
+++ b/libs/core/schedulers/include/hpx/schedulers/local_workrequesting_scheduler.hpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2023 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -99,26 +99,19 @@ namespace hpx::threads::policies {
         lockfree_fifo;
 #endif
 
-    ///////////////////////////////////////////////////////////////////////////
-    // The local_workrequesting_scheduler maintains exactly one queue of work
-    // items (threads) per OS thread, where this OS thread pulls its next work
-    // from.
-    template <typename Mutex = std::mutex,
-        typename PendingQueuing = lockfree_fifo,
-        typename StagedQueuing = lockfree_fifo,
-        typename TerminatedQueuing =
-            default_local_workrequesting_scheduler_terminated_queue>
-    class local_workrequesting_scheduler : public scheduler_base
-    {
-    public:
-        using has_periodic_maintenance = std::false_type;
+    namespace detail {
 
-        using thread_queue_type = thread_queue<Mutex, PendingQueuing,
-            StagedQueuing, TerminatedQueuing>;
+        ////////////////////////////////////////////////////////////////////////
+        inline unsigned int random_seed() noexcept
+        {
+            static std::random_device rd;
+            return rd();
+        }
 
-        struct init_parameter
+        ////////////////////////////////////////////////////////////////////////
+        struct workrequesting_init_parameter
         {
-            init_parameter(std::size_t num_queues,
+            workrequesting_init_parameter(std::size_t num_queues,
                 detail::affinity_data const& affinity_data,
                 std::size_t num_high_priority_queues = static_cast<std::size_t>(
                     -1),
@@ -136,7 +129,7 @@ namespace hpx::threads::policies {
             {
             }
 
-            init_parameter(std::size_t num_queues,
+            workrequesting_init_parameter(std::size_t num_queues,
                 detail::affinity_data const& affinity_data,
                 char const* description)
               : num_queues_(num_queues)
@@ -152,13 +145,10 @@ namespace hpx::threads::policies {
             detail::affinity_data const& affinity_data_;
             char const* description_;
         };
-        using init_parameter_type = init_parameter;
 
-    private:
-        ////////////////////////////////////////////////////////////////////////
-        struct task_data
+        struct workrequesting_task_data
         {
-            explicit HPX_HOST_DEVICE_CONSTEXPR task_data(
+            explicit workrequesting_task_data(
                 std::uint16_t num_thread = static_cast<std::uint16_t>(
                     -1)) noexcept
               : num_thread_(num_thread)
@@ -167,15 +157,15 @@ namespace hpx::threads::policies {
 
             // core number this task data originated from
             std::uint16_t num_thread_;
-            hpx::detail::small_vector<thread_id_ref_type, 3> tasks_;
+            hpx::detail::small_vector<thread_id_ref_type, 1> tasks_;
         };
 
-        ////////////////////////////////////////////////////////////////////////
-        using task_channel = lcos::local::channel_spsc<task_data,
-            lcos::local::channel_mode::dont_support_close>;
+        using workrequesting_task_channel =
+            lcos::local::channel_spsc<workrequesting_task_data,
+                lcos::local::channel_mode::dont_support_close>;
 
         ////////////////////////////////////////////////////////////////////////
-        struct steal_request
+        struct workrequesting_steal_request
         {
             enum class state : std::uint16_t
             {
@@ -184,10 +174,11 @@ namespace hpx::threads::policies {
                 failed = 4
             };
 
-            steal_request() = default;
+            workrequesting_steal_request() = default;
 
-            steal_request(std::size_t const num_thread, task_channel* channel,
-                mask_cref_type victims, bool idle, bool const stealhalf)
+            workrequesting_steal_request(std::size_t const num_thread,
+                workrequesting_task_channel* channel, mask_cref_type victims,
+                bool idle, bool const stealhalf)
               : channel_(channel)
               , victims_(victims)
               , num_thread_(static_cast<std::uint16_t>(num_thread))
@@ -197,19 +188,45 @@ namespace hpx::threads::policies {
             {
             }
 
-            task_channel* channel_ = nullptr;
-            mask_type victims_;
+            workrequesting_task_channel* channel_ = nullptr;
+            mask_type victims_ = mask_type();
             std::uint16_t num_thread_ = static_cast<std::uint16_t>(-1);
             std::uint16_t attempt_ = 0;
             state state_ = state::failed;
             // true ? attempt steal-half : attempt steal-one
-            bool stealhalf_ = false;
+            bool stealhalf_ = true;
         };
 
+        using workrequesting_steal_request_channel =
+            lcos::local::channel_mpsc<workrequesting_steal_request,
+                lcos::local::channel_mode::dont_support_close>;
+    }    // namespace detail
+
+    ///////////////////////////////////////////////////////////////////////////
+    // The local_workrequesting_scheduler maintains several queues of work
+    // items (threads) per OS thread, where this OS thread pulls its next work
+    // from.
+    template <typename Mutex = std::mutex,
+        typename PendingQueuing = lockfree_fifo,
+        typename StagedQueuing = lockfree_fifo,
+        typename TerminatedQueuing =
+            default_local_workrequesting_scheduler_terminated_queue>
+    class local_workrequesting_scheduler final : public scheduler_base
+    {
+    public:
+        using has_periodic_maintenance = std::false_type;
+        using thread_queue_type = thread_queue<Mutex, PendingQueuing,
+            StagedQueuing, TerminatedQueuing>;
+        using init_parameter_type = detail::workrequesting_init_parameter;
+
+    private:
         ////////////////////////////////////////////////////////////////////////
+        using task_data = detail::workrequesting_task_data;
+        using task_channel = detail::workrequesting_task_channel;
+
+        using steal_request = detail::workrequesting_steal_request;
         using steal_request_channel =
-            lcos::local::base_channel_mpsc<steal_request, util::spinlock,
-                lcos::local::channel_mode::dont_support_close>;
+            detail::workrequesting_steal_request_channel;
 
         ////////////////////////////////////////////////////////////////////////
         struct scheduler_data
@@ -258,7 +275,7 @@ namespace hpx::threads::policies {
             }
 
             // initial affinity mask for this core
-            mask_type victims_;
+            mask_type victims_ = mask_type();
 
             // queues for threads scheduled on this core
             thread_queue_type* queue_ = nullptr;
@@ -281,7 +298,7 @@ namespace hpx::threads::policies {
             // adaptive stealing
             std::uint16_t num_recent_steals_ = 0;
             std::uint16_t num_recent_tasks_executed_ = 0;
-            bool stealhalf_ = false;
+            bool stealhalf_ = true;
 
 #if defined(HPX_HAVE_WORKREQUESTING_LAST_VICTIM)
             // core number the last stolen tasks originated from
@@ -296,12 +313,6 @@ namespace hpx::threads::policies {
         };
 
     public:
-        static unsigned int random_seed() noexcept
-        {
-            static std::random_device rd;
-            return rd();
-        }
-
         explicit local_workrequesting_scheduler(init_parameter_type const& init,
             bool deferred_initialization = true)
           : scheduler_base(init.num_queues_, init.description_,
@@ -310,7 +321,7 @@ namespace hpx::threads::policies {
           , data_(init.num_queues_)
           , low_priority_queue_(thread_queue_init_)
           , curr_queue_(0)
-          , gen_(random_seed())
+          , gen_(detail::random_seed())
           , affinity_data_(init.affinity_data_)
           , num_queues_(init.num_queues_)
           , num_high_priority_queues_(init.num_high_priority_queues_)
@@ -341,7 +352,7 @@ namespace hpx::threads::policies {
 
         ~local_workrequesting_scheduler() override = default;
 
-        static std::string_view get_scheduler_name()
+        static constexpr std::string_view get_scheduler_name() noexcept
         {
             return "local_workrequesting_scheduler";
         }
@@ -815,18 +826,21 @@ namespace hpx::threads::policies {
             return false;
         }
 
-        // decline_or_forward_all_steal_requests is only called when a worker
+        // decline_or_forward_one_steal_requests is only called when a worker
         // has nothing else to do but to relay steal requests, which means the
         // worker is idle.
-        void decline_or_forward_all_steal_requests(scheduler_data& d) noexcept
+        void decline_or_forward_one_steal_requests(scheduler_data& d) noexcept
         {
-            steal_request req;
-            while (try_receiving_steal_request(d, req))
+            if (!d.requests_->is_empty())
             {
+                steal_request req;
+                if (try_receiving_steal_request(d, req))
+                {
 #if defined(HPX_HAVE_WORKREQUESTING_STEAL_STATISTICS)
-                ++d.steal_requests_received_;
+                    ++d.steal_requests_received_;
 #endif
-                decline_or_forward_steal_request(d, req);
+                    decline_or_forward_steal_request(d, req);
+                }
             }
         }
 
@@ -863,16 +877,19 @@ namespace hpx::threads::policies {
                 task_data thrds(d.num_thread_);
                 thrds.tasks_.reserve(max_num_to_steal);
 
+#ifdef HPX_HAVE_THREAD_STEALING_COUNTS
                 thread_id_ref_type thrd;
                 while (max_num_to_steal-- != 0 &&
                     d.queue_->get_next_thread(thrd, false, true))
                 {
-#ifdef HPX_HAVE_THREAD_STEALING_COUNTS
                     d.queue_->increment_num_stolen_from_pending();
-#endif
                     thrds.tasks_.push_back(HPX_MOVE(thrd));
                     thrd = thread_id_ref_type{};
                 }
+#else
+                d.queue_->get_next_threads(
+                    thrds.tasks_.begin(), thrds.tasks_.size(), false, true);
+#endif
 
                 // we are ready to send at least one task
                 if (!thrds.tasks_.empty())
@@ -902,54 +919,46 @@ namespace hpx::threads::policies {
         {
             HPX_ASSERT(num_thread < num_queues_);
 
-            auto& d = data_[num_thread].data_;
-            if (num_thread < num_high_priority_queues_)
-            {
-                bool result = d.high_priority_queue_->get_next_thread(thrd);
-
-#ifdef HPX_HAVE_THREAD_STEALING_COUNTS
-                d.high_priority_queue_->increment_num_pending_accesses();
-                if (result)
-                {
-                    ++d.num_recent_tasks_executed_;
-                    return true;
-                }
-                d.high_priority_queue_->increment_num_pending_misses();
-#else
-                if (result)
+            auto const get_thread = [](thread_queue_type* this_queue,
+                                        thread_id_ref_type& thrd) {
+                bool result = false;
+                if (this_queue->get_pending_queue_length(
+                        std::memory_order_relaxed) != 0)
                 {
-                    ++d.num_recent_tasks_executed_;
-                    return true;
-                }
+                    result = this_queue->get_next_thread(thrd);
+#ifdef HPX_HAVE_THREAD_STEALING_COUNTS
+                    this_queue->increment_num_pending_accesses();
+                    if (!result)
+                        this_queue->increment_num_pending_misses();
 #endif
-            }
+                }
+                return result;
+            };
 
-            bool result = false;
-            for (thread_queue_type* this_queue : {d.bound_queue_, d.queue_})
-            {
-                result = this_queue->get_next_thread(thrd);
+            auto& d = data_[num_thread].data_;
 
-#ifdef HPX_HAVE_THREAD_STEALING_COUNTS
-                this_queue->increment_num_pending_accesses();
-#endif
-                if (result)
-                    break;
-#ifdef HPX_HAVE_THREAD_STEALING_COUNTS
-                this_queue->increment_num_pending_misses();
-#endif
+            if (num_thread < num_high_priority_queues_ &&
+                get_thread(d.high_priority_queue_, thrd))
+            {
+                ++d.num_recent_tasks_executed_;
+                return true;
             }
 
-            if (allow_stealing && result)
+            if (allow_stealing &&
+                (get_thread(d.bound_queue_, thrd) ||
+                    get_thread(d.queue_, thrd)))
             {
                 // We found a task to run, however before running it we handle
                 // steal requests (assuming that there is more work left that
                 // could be used to satisfy steal requests).
-
-                steal_request req;
-                while (try_receiving_steal_request(d, req))
+                if (!d.requests_->is_empty())
                 {
-                    if (!handle_steal_request(d, req))
-                        break;
+                    steal_request req;
+                    while (try_receiving_steal_request(d, req))
+                    {
+                        if (!handle_steal_request(d, req))
+                            break;
+                    }
                 }
 
                 ++d.num_recent_tasks_executed_;
@@ -964,7 +973,8 @@ namespace hpx::threads::policies {
                 return false;
             }
 
-            if (low_priority_queue_.get_next_thread(thrd))
+            if (num_thread == num_queues_ - 1 &&
+                get_thread(&low_priority_queue_, thrd))
             {
                 ++d.num_recent_tasks_executed_;
                 return true;
@@ -1447,7 +1457,7 @@ namespace hpx::threads::policies {
         // return a random victim for the current stealing operation
         std::size_t random_victim(steal_request const& req) noexcept
         {
-            std::size_t result = 0;
+            std::size_t result;
 
             {
                 // generate at most 3 random numbers before resorting to more
@@ -1591,7 +1601,7 @@ namespace hpx::threads::policies {
         // Try receiving tasks that are sent by another core as a response to
         // one of our steal requests. This returns true if new tasks were
         // received.
-        bool try_receiving_tasks(scheduler_data& d, std::size_t& added,
+        static bool try_receiving_tasks(scheduler_data& d, std::size_t& added,
             thread_id_ref_type* next_thrd)
         {
             task_data thrds{};
@@ -1699,14 +1709,18 @@ namespace hpx::threads::policies {
                 HPX_ASSERT(d.requested_ != 0);
             }
 
-            if (try_receiving_tasks(d, added, next_thrd))
+            if (!d.tasks_->is_empty() &&
+                try_receiving_tasks(d, added, next_thrd))
             {
                 return false;
             }
 
             // if we did not receive any new task, decline or forward all
-            // pending steal requests
-            decline_or_forward_all_steal_requests(d);
+            // pending steal requests, if there are any
+            if (HPX_UNLIKELY(!d.requests_->is_empty()))
+            {
+                decline_or_forward_one_steal_requests(d);
+            }
 
 #ifdef HPX_HAVE_THREAD_MINIMAL_DEADLOCK_DETECTION
             // no new work is available, are we deadlocked?
diff --git a/libs/core/schedulers/include/hpx/schedulers/lockfree_queue_backends.hpp b/libs/core/schedulers/include/hpx/schedulers/lockfree_queue_backends.hpp
index 65296f65cdd0..269b147745d6 100644
--- a/libs/core/schedulers/include/hpx/schedulers/lockfree_queue_backends.hpp
+++ b/libs/core/schedulers/include/hpx/schedulers/lockfree_queue_backends.hpp
@@ -50,6 +50,8 @@ namespace hpx::threads::policies {
         using rvalue_reference = T&&;
         using size_type = std::uint64_t;
 
+        static constexpr bool support_bulk_dequeue = false;
+
         explicit lockfree_fifo_backend(size_type initial_size = 0,
             size_type /* num_thread */ = static_cast<size_type>(-1))
           : queue_(static_cast<std::size_t>(initial_size))
@@ -114,6 +116,8 @@ namespace hpx::threads::policies {
         using rvalue_reference = T&&;
         using size_type = std::uint64_t;
 
+        static constexpr bool support_bulk_dequeue = true;
+
         explicit moodycamel_fifo_backend(size_type initial_size = 0,
             size_type /* num_thread */ = static_cast<size_type>(-1))
           : queue_(static_cast<std::size_t>(initial_size))
@@ -136,6 +140,14 @@ namespace hpx::threads::policies {
             return queue_.try_dequeue(val);
         }
 
+        template <typename Iterator>
+        std::size_t pop_bulk(Iterator it, std::int64_t max_items,
+            bool /* steal */ = true) noexcept(noexcept(std::
+                is_nothrow_copy_constructible_v<T>))
+        {
+            return queue_.try_dequeue_bulk(it, max_items);
+        }
+
         bool empty() noexcept
         {
             return (queue_.size_approx() == 0);
@@ -170,6 +182,8 @@ namespace hpx::threads::policies {
         using rvalue_reference = T&&;
         using size_type = std::uint64_t;
 
+        static constexpr bool support_bulk_dequeue = false;
+
         explicit lockfree_lifo_backend(size_type initial_size = 0,
             size_type /* num_thread */ = static_cast<size_type>(-1))
           : queue_(static_cast<std::size_t>(initial_size))
@@ -230,6 +244,8 @@ namespace hpx::threads::policies {
         using rvalue_reference = T&&;
         using size_type = std::uint64_t;
 
+        static constexpr bool support_bulk_dequeue = false;
+
         explicit lockfree_abp_fifo_backend(size_type initial_size = 0,
             size_type /* num_thread */ = static_cast<size_type>(-1))
           : queue_(static_cast<std::size_t>(initial_size))
@@ -287,6 +303,8 @@ namespace hpx::threads::policies {
         using rvalue_reference = T&&;
         using size_type = std::uint64_t;
 
+        static constexpr bool support_bulk_dequeue = false;
+
         explicit lockfree_abp_lifo_backend(size_type initial_size = 0,
             size_type /* num_thread */ = static_cast<size_type>(-1))
           : queue_(static_cast<std::size_t>(initial_size))
diff --git a/libs/core/schedulers/include/hpx/schedulers/static_priority_queue_scheduler.hpp b/libs/core/schedulers/include/hpx/schedulers/static_priority_queue_scheduler.hpp
index 57c03ff3af21..c4dcfda9d57b 100644
--- a/libs/core/schedulers/include/hpx/schedulers/static_priority_queue_scheduler.hpp
+++ b/libs/core/schedulers/include/hpx/schedulers/static_priority_queue_scheduler.hpp
@@ -33,7 +33,7 @@ namespace hpx::threads::policies {
     ///////////////////////////////////////////////////////////////////////////
     // The static_priority_queue_scheduler maintains exactly one queue of work
     // items (threads) per OS thread, where this OS thread pulls its next work
-    // from. Additionally it maintains separate queues: several for high
+    // from. Additionally, it maintains separate queues: several for high
     // priority threads and one for low priority threads.
     //
     // High priority threads are executed by the first N OS threads before any
diff --git a/libs/core/schedulers/include/hpx/schedulers/thread_queue.hpp b/libs/core/schedulers/include/hpx/schedulers/thread_queue.hpp
index 4c8524bad734..57e4521109eb 100644
--- a/libs/core/schedulers/include/hpx/schedulers/thread_queue.hpp
+++ b/libs/core/schedulers/include/hpx/schedulers/thread_queue.hpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2023 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //  Copyright (c) 2011      Bryce Lelbach
 //
 //  SPDX-License-Identifier: BSL-1.0
@@ -14,7 +14,6 @@
 #include <hpx/functional/function.hpp>
 #include <hpx/modules/errors.hpp>
 #include <hpx/modules/format.hpp>
-#include <hpx/schedulers/deadlock_detection.hpp>
 #include <hpx/schedulers/queue_helpers.hpp>
 #include <hpx/thread_support/assert_owns_lock.hpp>
 #include <hpx/thread_support/unlock_guard.hpp>
@@ -23,14 +22,20 @@
 #include <hpx/threading_base/thread_data_stackful.hpp>
 #include <hpx/threading_base/thread_data_stackless.hpp>
 #include <hpx/threading_base/thread_queue_init_parameters.hpp>
-#include <hpx/util/get_and_reset_value.hpp>
 
+#if defined(HPX_HAVE_THREAD_MINIMAL_DEADLOCK_DETECTION)
+#include <hpx/schedulers/deadlock_detection.hpp>
+#endif
 #ifdef HPX_HAVE_THREAD_QUEUE_WAITTIME
 #include <hpx/schedulers/maintain_queue_wait_times.hpp>
 #include <hpx/timing/high_resolution_clock.hpp>
 #endif
 #ifdef HPX_HAVE_THREAD_CREATION_AND_CLEANUP_RATES
 #include <hpx/timing/tick_counter.hpp>
+#include <hpx/util/get_and_reset_value.hpp>
+#endif
+#ifdef HPX_HAVE_THREAD_STEALING_COUNTS
+#include <hpx/util/get_and_reset_value.hpp>
 #endif
 
 #include <algorithm>
@@ -89,7 +94,8 @@ namespace hpx::threads::policies {
         // we use a simple mutex to protect the data members for now
         using mutex_type = Mutex;
 
-        // this is the type of a map holding all threads (except depleted ones)
+        // this is the type of the map holding all threads (except depleted
+        // ones)
         using thread_map_type =
             std::unordered_set<thread_id_type, std::hash<thread_id_type>,
                 std::equal_to<>, util::internal_allocator<thread_id_type>>;
@@ -890,6 +896,82 @@ namespace hpx::threads::policies {
             return false;
         }
 
+        // Return the next thread to be executed, return false if none is
+        // available
+        template <typename Iterator>
+        std::size_t get_next_threads(Iterator it, std::int64_t max_items,
+            bool allow_stealing = false, bool steal = false)
+        {
+            std::int64_t const work_items_count = (std::min)(
+                work_items_count_.data_.load(std::memory_order_relaxed),
+                max_items);
+
+            if (work_items_count == 0)
+            {
+                return false;
+            }
+
+            if (allow_stealing &&
+                parameters_.min_tasks_to_steal_pending_ > work_items_count)
+            {
+                return false;
+            }
+
+#ifdef HPX_HAVE_THREAD_QUEUE_WAITTIME
+            std::size_t const max_items_requested = max_items;
+
+            thread_description_ptr tdesc;
+            while (work_items_.pop(tdesc, steal))
+            {
+                if (get_maintain_queue_wait_times_enabled())
+                {
+                    work_items_wait_ +=
+                        hpx::chrono::high_resolution_clock::now() -
+                        tdesc->waittime;
+                    ++work_items_wait_count_;
+                }
+
+                *it++ = HPX_MOVE(tdesc->data);
+                delete tdesc;
+
+                --max_items;
+                if (--work_items_count_.data_ == 0)
+                {
+                    break;
+                }
+            }
+
+            return max_items_requested - max_items;
+#else
+            if constexpr (work_items_type::support_bulk_dequeue)
+            {
+                std::size_t const dequeued =
+                    work_items_.pop_bulk(it, work_items_count, steal);
+                work_items_count_.data_ -= dequeued;
+                return dequeued;
+            }
+            else
+            {
+                std::size_t const max_items_requested = max_items;
+
+                thread_description_ptr next_thrd;
+                while (max_items != 0 && work_items_.pop(next_thrd, steal))
+                {
+                    *it++ = threads::thread_id_ref_type(next_thrd,
+                        threads::thread_id_addref::no);    // do not addref!
+
+                    --max_items;
+                    if (--work_items_count_.data_ == 0)
+                    {
+                        break;
+                    }
+                }
+
+                return max_items_requested - max_items;
+            }
+#endif
+        }
+
         // Schedule the passed thread
         void schedule_thread(
             threads::thread_id_ref_type thrd, bool other_end = false)
diff --git a/libs/core/schedulers/include/hpx/schedulers/thread_queue_mc.hpp b/libs/core/schedulers/include/hpx/schedulers/thread_queue_mc.hpp
index aa1c22ee1dde..e6b7c6887893 100644
--- a/libs/core/schedulers/include/hpx/schedulers/thread_queue_mc.hpp
+++ b/libs/core/schedulers/include/hpx/schedulers/thread_queue_mc.hpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2022 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //  Copyright (c) 2011      Bryce Lelbach
 //
 //  SPDX-License-Identifier: BSL-1.0
@@ -11,20 +11,11 @@
 #include <hpx/allocator_support/internal_allocator.hpp>
 #include <hpx/assert.hpp>
 #include <hpx/concurrency/cache_line_data.hpp>
-#include <hpx/datastructures/tuple.hpp>
-#include <hpx/functional/function.hpp>
 #include <hpx/modules/errors.hpp>
-#include <hpx/schedulers/deadlock_detection.hpp>
 #include <hpx/schedulers/lockfree_queue_backends.hpp>
-#include <hpx/schedulers/maintain_queue_wait_times.hpp>
 #include <hpx/schedulers/queue_holder_thread.hpp>
-#include <hpx/schedulers/thread_queue.hpp>
-#include <hpx/thread_support/unlock_guard.hpp>
 #include <hpx/threading_base/thread_data.hpp>
 #include <hpx/threading_base/thread_queue_init_parameters.hpp>
-#include <hpx/timing/high_resolution_clock.hpp>
-#include <hpx/topology/topology.hpp>
-#include <hpx/util/get_and_reset_value.hpp>
 
 #ifdef HPX_HAVE_THREAD_CREATION_AND_CLEANUP_RATES
 #include <hpx/timing/tick_counter.hpp>
@@ -36,13 +27,10 @@
 #include <exception>
 #include <functional>
 #include <list>
-#include <map>
 #include <memory>
 #include <mutex>
 #include <string>
-#include <unordered_set>
 #include <utility>
-#include <vector>
 
 #if !defined(THREAD_QUEUE_MC_DEBUG)
 #if defined(HPX_DEBUG)
@@ -71,8 +59,7 @@ namespace hpx::threads::policies {
         // we use a simple mutex to protect the data members for now
         using mutex_type = Mutex;
 
-        using thread_queue_type = thread_queue_mc<Mutex, PendingQueuing,
-            StagedQueuing, TerminatedQueuing>;
+        using thread_queue_type = thread_queue_mc;
 
         using thread_heap_type =
             std::list<thread_id_type, util::internal_allocator<thread_id_type>>;
@@ -84,9 +71,8 @@ namespace hpx::threads::policies {
             typename PendingQueuing::template apply<thread_id_ref_type>::type;
 
         using task_items_type =
-            typename concurrentqueue_fifo::apply<task_description>::type;
+            concurrentqueue_fifo::apply<task_description>::type;
 
-    public:
         // ----------------------------------------------------------------
         // Take thread init data from the new work queue and convert it into
         // full thread_data items that are added to the pending queue.
@@ -138,7 +124,7 @@ namespace hpx::threads::policies {
 
     public:
         explicit thread_queue_mc(thread_queue_init_parameters const& parameters,
-            std::size_t queue_num = std::size_t(-1))
+            std::size_t queue_num = static_cast<std::size_t>(-1))
           : parameters_(parameters)
           , queue_index_(static_cast<int>(queue_num))
           , holder_(nullptr)
@@ -149,6 +135,11 @@ namespace hpx::threads::policies {
             work_items_count_.data_ = 0;
         }
 
+        thread_queue_mc(thread_queue_mc const&) = delete;
+        thread_queue_mc(thread_queue_mc&&) = delete;
+        thread_queue_mc& operator=(thread_queue_mc const&) = delete;
+        thread_queue_mc& operator=(thread_queue_mc&&) = delete;
+
         // ----------------------------------------------------------------
         void set_holder(queue_holder_thread<thread_queue_type>* holder)
         {
@@ -166,7 +157,7 @@ namespace hpx::threads::policies {
         // items)
         std::int64_t get_queue_length() const noexcept
         {
-            return std::int64_t(work_items_count_.data_.load(
+            return static_cast<std::int64_t>(work_items_count_.data_.load(
                        std::memory_order_relaxed)) +
                 new_tasks_count_.data_.load(std::memory_order_relaxed);
         }
@@ -192,7 +183,6 @@ namespace hpx::threads::policies {
         {
             HPX_THROW_EXCEPTION(hpx::error::bad_parameter, "get_thread_count",
                 "use get_queue_length_staged/get_queue_length_pending");
-            return 0;
         }
 
         // create a new thread and schedule it if the initial state is equal to
@@ -267,7 +257,7 @@ namespace hpx::threads::policies {
         bool get_next_thread(threads::thread_id_ref_type& thrd, bool other_end,
             bool check_new = false) HPX_HOT
         {
-            std::int64_t work_items_count_count =
+            std::int64_t const work_items_count_count =
                 work_items_count_.data_.load(std::memory_order_relaxed);
 
             if (0 != work_items_count_count && work_items_.pop(thrd, other_end))
@@ -308,10 +298,10 @@ namespace hpx::threads::policies {
         }
 
         ///////////////////////////////////////////////////////////////////////
-        constexpr void on_start_thread(std::size_t /* num_thread */) noexcept {}
-        constexpr void on_stop_thread(std::size_t /* num_thread */) noexcept {}
-        constexpr void on_error(std::size_t /* num_thread */,
-            std::exception_ptr const& /* e */) noexcept
+        static constexpr void on_start_thread(std::size_t) noexcept {}
+        static constexpr void on_stop_thread(std::size_t) noexcept {}
+        static constexpr void on_error(
+            std::size_t, std::exception_ptr const&) noexcept
         {
         }
 
diff --git a/libs/core/synchronization/include/hpx/synchronization/channel_mpmc.hpp b/libs/core/synchronization/include/hpx/synchronization/channel_mpmc.hpp
index 7e5f15d833e8..4ce592e00eea 100644
--- a/libs/core/synchronization/include/hpx/synchronization/channel_mpmc.hpp
+++ b/libs/core/synchronization/include/hpx/synchronization/channel_mpmc.hpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2019-2023 Hartmut Kaiser
+//  Copyright (c) 2019-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -14,7 +14,6 @@
 #include <hpx/modules/errors.hpp>
 #include <hpx/modules/thread_support.hpp>
 #include <hpx/synchronization/spinlock.hpp>
-#include <hpx/type_support/construct_at.hpp>
 
 #include <cstddef>
 #include <memory>
@@ -34,7 +33,7 @@ namespace hpx::lcos::local {
     private:
         using mutex_type = Mutex;
 
-        constexpr bool is_full(std::size_t tail) const noexcept
+        [[nodiscard]] constexpr bool is_full(std::size_t tail) const noexcept
         {
             std::size_t const numitems = size_ + tail - head_.data_;
             if (numitems < size_)
@@ -44,7 +43,7 @@ namespace hpx::lcos::local {
             return numitems - size_ == size_ - 1;
         }
 
-        constexpr bool is_empty(std::size_t head) const noexcept
+        [[nodiscard]] constexpr bool is_empty(std::size_t head) const noexcept
         {
             return head == tail_.data_;
         }
@@ -95,6 +94,16 @@ namespace hpx::lcos::local {
             }
         }
 
+        [[nodiscard]] bool is_empty() const noexcept
+        {
+            std::unique_lock<mutex_type> l(mtx_.data_);
+            if (closed_)
+            {
+                return true;
+            }
+            return is_empty(head_.data_);
+        }
+
         bool get(T* val = nullptr) const noexcept
         {
             std::unique_lock<mutex_type> l(mtx_.data_);
@@ -160,7 +169,7 @@ namespace hpx::lcos::local {
             return close(l);
         }
 
-        constexpr std::size_t capacity() const noexcept
+        [[nodiscard]] constexpr std::size_t capacity() const noexcept
         {
             return size_ - 1;
         }
diff --git a/libs/core/synchronization/include/hpx/synchronization/channel_mpsc.hpp b/libs/core/synchronization/include/hpx/synchronization/channel_mpsc.hpp
index 6e7b880b75b0..c0ecbaa42d7a 100644
--- a/libs/core/synchronization/include/hpx/synchronization/channel_mpsc.hpp
+++ b/libs/core/synchronization/include/hpx/synchronization/channel_mpsc.hpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2019-2023 Hartmut Kaiser
+//  Copyright (c) 2019-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -15,7 +15,6 @@
 #include <hpx/modules/thread_support.hpp>
 #include <hpx/synchronization/channel_spsc.hpp>
 #include <hpx/synchronization/spinlock.hpp>
-#include <hpx/type_support/construct_at.hpp>
 
 #include <atomic>
 #include <cstddef>
@@ -37,7 +36,7 @@ namespace hpx::lcos::local {
     private:
         using mutex_type = Mutex;
 
-        bool is_full(std::size_t tail) const noexcept
+        [[nodiscard]] bool is_full(std::size_t tail) const noexcept
         {
             std::size_t const numitems =
                 size_ + tail - head_.data_.load(std::memory_order_acquire);
@@ -49,7 +48,7 @@ namespace hpx::lcos::local {
             return (numitems - size_ == size_ - 1);
         }
 
-        bool is_empty(std::size_t head) const noexcept
+        [[nodiscard]] bool is_empty(std::size_t head) const noexcept
         {
             return head == tail_.data_.tail_.load(std::memory_order_relaxed);
         }
@@ -110,6 +109,15 @@ namespace hpx::lcos::local {
             }
         }
 
+        [[nodiscard]] bool is_empty() const noexcept
+        {
+            if (closed_.load(std::memory_order_relaxed))
+            {
+                return true;
+            }
+            return is_empty(head_.data_.load(std::memory_order_relaxed));
+        }
+
         bool get(T* val = nullptr) const noexcept
         {
             if (closed_.load(std::memory_order_relaxed))
@@ -182,7 +190,7 @@ namespace hpx::lcos::local {
             return 0;
         }
 
-        constexpr std::size_t capacity() const noexcept
+        [[nodiscard]] constexpr std::size_t capacity() const noexcept
         {
             return size_ - 1;
         }
@@ -215,7 +223,7 @@ namespace hpx::lcos::local {
     private:
         using mutex_type = Mutex;
 
-        bool is_full(std::size_t tail) const noexcept
+        [[nodiscard]] bool is_full(std::size_t tail) const noexcept
         {
             std::size_t const numitems =
                 size_ + tail - head_.data_.load(std::memory_order_acquire);
@@ -227,7 +235,7 @@ namespace hpx::lcos::local {
             return (numitems - size_ == size_ - 1);
         }
 
-        bool is_empty(std::size_t head) const noexcept
+        [[nodiscard]] bool is_empty(std::size_t head) const noexcept
         {
             return head == tail_.data_.tail_.load(std::memory_order_relaxed);
         }
@@ -243,6 +251,9 @@ namespace hpx::lcos::local {
             tail_.data_.tail_.store(0, std::memory_order_relaxed);
         }
 
+        base_channel_mpsc(base_channel_mpsc const& rhs) = delete;
+        base_channel_mpsc& operator=(base_channel_mpsc const& rhs) = delete;
+
         base_channel_mpsc(base_channel_mpsc&& rhs) noexcept
           : size_(rhs.size_)
           , buffer_(HPX_MOVE(rhs.buffer_))
@@ -270,6 +281,11 @@ namespace hpx::lcos::local {
 
         ~base_channel_mpsc() = default;
 
+        [[nodiscard]] bool is_empty() const noexcept
+        {
+            return is_empty(head_.data_.load(std::memory_order_relaxed));
+        }
+
         bool get(T* val = nullptr) const noexcept
         {
             std::size_t head = head_.data_.load(std::memory_order_relaxed);
@@ -320,7 +336,7 @@ namespace hpx::lcos::local {
             return true;
         }
 
-        constexpr std::size_t capacity() const noexcept
+        [[nodiscard]] constexpr std::size_t capacity() const noexcept
         {
             return size_ - 1;
         }
diff --git a/libs/core/synchronization/include/hpx/synchronization/channel_spsc.hpp b/libs/core/synchronization/include/hpx/synchronization/channel_spsc.hpp
index ef5d2a897f83..44b629a80714 100644
--- a/libs/core/synchronization/include/hpx/synchronization/channel_spsc.hpp
+++ b/libs/core/synchronization/include/hpx/synchronization/channel_spsc.hpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2019-2023 Hartmut Kaiser
+//  Copyright (c) 2019-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -12,7 +12,6 @@
 #include <hpx/assert.hpp>
 #include <hpx/modules/concurrency.hpp>
 #include <hpx/modules/errors.hpp>
-#include <hpx/type_support/construct_at.hpp>
 
 #include <atomic>
 #include <cstddef>
@@ -37,7 +36,8 @@ namespace hpx::lcos::local {
     class channel_spsc
     {
     private:
-        HPX_FORCEINLINE bool is_full(std::size_t tail) const noexcept
+        [[nodiscard]] HPX_FORCEINLINE bool is_full(
+            std::size_t tail) const noexcept
         {
             std::size_t const numitems =
                 size_ + tail - head_.data_.load(std::memory_order_acquire);
@@ -49,7 +49,8 @@ namespace hpx::lcos::local {
             return (numitems - size_ == size_ - 1);
         }
 
-        HPX_FORCEINLINE bool is_empty(std::size_t head) const noexcept
+        [[nodiscard]] HPX_FORCEINLINE bool is_empty(
+            std::size_t head) const noexcept
         {
             return head == tail_.data_.load(std::memory_order_acquire);
         }
@@ -108,6 +109,15 @@ namespace hpx::lcos::local {
             }
         }
 
+        [[nodiscard]] bool is_empty() const noexcept
+        {
+            if (closed_.load(std::memory_order_relaxed))
+            {
+                return true;
+            }
+            return is_empty(head_.data_.load(std::memory_order_relaxed));
+        }
+
         bool get(T* val = nullptr) const noexcept
         {
             if (closed_.load(std::memory_order_relaxed))
@@ -173,7 +183,7 @@ namespace hpx::lcos::local {
             return 0;
         }
 
-        constexpr std::size_t capacity() const noexcept
+        [[nodiscard]] constexpr std::size_t capacity() const noexcept
         {
             return size_ - 1;
         }
@@ -200,7 +210,8 @@ namespace hpx::lcos::local {
     class channel_spsc<T, channel_mode::dont_support_close>
     {
     private:
-        HPX_FORCEINLINE bool is_full(std::size_t tail) const noexcept
+        [[nodiscard]] HPX_FORCEINLINE bool is_full(
+            std::size_t tail) const noexcept
         {
             std::size_t const numitems =
                 size_ + tail - head_.data_.load(std::memory_order_acquire);
@@ -212,7 +223,8 @@ namespace hpx::lcos::local {
             return (numitems - size_ == size_ - 1);
         }
 
-        HPX_FORCEINLINE bool is_empty(std::size_t head) const noexcept
+        [[nodiscard]] HPX_FORCEINLINE bool is_empty(
+            std::size_t head) const noexcept
         {
             return head == tail_.data_.load(std::memory_order_acquire);
         }
@@ -228,6 +240,9 @@ namespace hpx::lcos::local {
             tail_.data_.store(0, std::memory_order_relaxed);
         }
 
+        channel_spsc(channel_spsc const& rhs) = delete;
+        channel_spsc& operator=(channel_spsc const& rhs) = delete;
+
         channel_spsc(channel_spsc&& rhs) noexcept
           : size_(rhs.size_)
           , buffer_(HPX_MOVE(rhs.buffer_))
@@ -253,6 +268,11 @@ namespace hpx::lcos::local {
 
         ~channel_spsc() = default;
 
+        [[nodiscard]] bool is_empty() const noexcept
+        {
+            return is_empty(head_.data_.load(std::memory_order_relaxed));
+        }
+
         bool get(T* val = nullptr) const noexcept
         {
             std::size_t head = head_.data_.load(std::memory_order_relaxed);
@@ -296,7 +316,7 @@ namespace hpx::lcos::local {
             return true;
         }
 
-        constexpr std::size_t capacity() const noexcept
+        [[nodiscard]] constexpr std::size_t capacity() const noexcept
         {
             return size_ - 1;
         }
diff --git a/libs/core/thread_pools/include/hpx/thread_pools/detail/background_thread.hpp b/libs/core/thread_pools/include/hpx/thread_pools/detail/background_thread.hpp
index 17390400f8be..77415f97acd7 100644
--- a/libs/core/thread_pools/include/hpx/thread_pools/detail/background_thread.hpp
+++ b/libs/core/thread_pools/include/hpx/thread_pools/detail/background_thread.hpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2023 Hartmut Kaiser
+//  Copyright (c) 2023-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -7,7 +7,6 @@
 #pragma once
 
 #include <hpx/config.hpp>
-#include <hpx/coroutines/thread_enums.hpp>
 #include <hpx/execution_base/this_thread.hpp>
 #include <hpx/thread_pools/detail/scheduling_callbacks.hpp>
 #include <hpx/thread_pools/detail/scheduling_counters.hpp>
diff --git a/libs/core/thread_pools/include/hpx/thread_pools/scheduled_thread_pool.hpp b/libs/core/thread_pools/include/hpx/thread_pools/scheduled_thread_pool.hpp
index 22fa098195b9..b12a27d63caa 100644
--- a/libs/core/thread_pools/include/hpx/thread_pools/scheduled_thread_pool.hpp
+++ b/libs/core/thread_pools/include/hpx/thread_pools/scheduled_thread_pool.hpp
@@ -1,5 +1,5 @@
 //  Copyright (c) 2017 Shoshana Jakobovits
-//  Copyright (c) 2007-2022 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -42,12 +42,18 @@ namespace hpx::threads::detail {
 
     ///////////////////////////////////////////////////////////////////////////
     template <typename Scheduler>
-    class scheduled_thread_pool : public hpx::threads::thread_pool_base
+    class scheduled_thread_pool final : public hpx::threads::thread_pool_base
     {
     public:
         ///////////////////////////////////////////////////////////////////
         scheduled_thread_pool(std::unique_ptr<Scheduler> sched,
             thread_pool_init_parameters const& init);
+
+        scheduled_thread_pool(scheduled_thread_pool const&) = delete;
+        scheduled_thread_pool(scheduled_thread_pool&&) = delete;
+        scheduled_thread_pool& operator=(scheduled_thread_pool const&) = delete;
+        scheduled_thread_pool& operator=(scheduled_thread_pool&&) = delete;
+
         virtual ~scheduled_thread_pool();
 
         void print_pool(std::ostream& os) const override;
@@ -153,7 +159,7 @@ namespace hpx::threads::detail {
         std::thread& get_os_thread_handle(
             std::size_t global_thread_num) override
         {
-            std::size_t num_thread_local =
+            std::size_t const num_thread_local =
                 global_thread_num - this->thread_offset_;
             HPX_ASSERT(num_thread_local < threads_.size());
             return threads_[num_thread_local];
diff --git a/libs/core/thread_pools/include/hpx/thread_pools/scheduled_thread_pool_impl.hpp b/libs/core/thread_pools/include/hpx/thread_pools/scheduled_thread_pool_impl.hpp
index 72c30824fe08..314862ee9b8e 100644
--- a/libs/core/thread_pools/include/hpx/thread_pools/scheduled_thread_pool_impl.hpp
+++ b/libs/core/thread_pools/include/hpx/thread_pools/scheduled_thread_pool_impl.hpp
@@ -1,5 +1,5 @@
 //  Copyright (c) 2017 Shoshana Jakobovits
-//  Copyright (c) 2007-2022 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -13,11 +13,11 @@
 #include <hpx/execution_base/this_thread.hpp>
 #include <hpx/functional/deferred_call.hpp>
 #include <hpx/functional/detail/invoke.hpp>
+#include <hpx/functional/experimental/scope_exit.hpp>
 #include <hpx/modules/errors.hpp>
 #include <hpx/modules/schedulers.hpp>
 #include <hpx/thread_pools/scheduled_thread_pool.hpp>
 #include <hpx/thread_pools/scheduling_loop.hpp>
-#include <hpx/threading_base/callback_notifier.hpp>
 #include <hpx/threading_base/create_thread.hpp>
 #include <hpx/threading_base/create_work.hpp>
 #include <hpx/threading_base/scheduler_base.hpp>
@@ -42,7 +42,6 @@
 #include <iosfwd>
 #include <memory>
 #include <mutex>
-#include <numeric>
 #include <string>
 #include <system_error>
 #include <thread>
@@ -52,21 +51,6 @@
 
 namespace hpx::threads::detail {
 
-    ///////////////////////////////////////////////////////////////////////////
-    struct manage_active_thread_count
-    {
-        explicit manage_active_thread_count(std::atomic<long>& counter) noexcept
-          : counter_(counter)
-        {
-        }
-        ~manage_active_thread_count()
-        {
-            --counter_;
-        }
-
-        std::atomic<long>& counter_;
-    };
-
     ///////////////////////////////////////////////////////////////////////////
     template <typename Scheduler>
     struct init_tss_helper
@@ -324,8 +308,7 @@ namespace hpx::threads::detail {
 
         // run threads and wait for initialization to complete
         std::size_t thread_num = 0;
-        std::shared_ptr<util::barrier> startup =
-            std::make_shared<util::barrier>(pool_threads + 1);
+        auto const startup = std::make_shared<util::barrier>(pool_threads + 1);
         try
         {
             topology const& topo = create_topology();
@@ -334,13 +317,13 @@ namespace hpx::threads::detail {
             {
                 std::size_t global_thread_num =
                     this->thread_offset_ + thread_num;
-                threads::mask_cref_type mask =
+                threads::mask_cref_type const mask =
                     affinity_data_.get_pu_mask(topo, global_thread_num);
 
                 // thread_num ordering: 1. threads of default pool
                 //                      2. threads of first special pool
                 //                      3. etc.
-                // get_pu_mask expects index according to ordering of masks
+                // get_pu_mask expects index according to the ordering of masks
                 // in affinity_data::affinity_masks_
                 // which is in order of occupied PU
                 LTM_(info).format("run: {} create OS thread {}: will run "
@@ -453,7 +436,7 @@ namespace hpx::threads::detail {
         topology const& topo = create_topology();
 
         // Set the affinity for the current thread.
-        threads::mask_cref_type mask =
+        threads::mask_cref_type const mask =
             affinity_data_.get_pu_mask(topo, global_thread_num);
 
         if (LHPX_ENABLED(debug))
@@ -500,11 +483,11 @@ namespace hpx::threads::detail {
         // set state to running
         std::atomic<hpx::state>& state =
             sched_->Scheduler::get_state(thread_num);
-        hpx::state oldstate = state.exchange(hpx::state::running);
+        [[maybe_unused]] hpx::state const oldstate =
+            state.exchange(hpx::state::running);
         HPX_ASSERT(oldstate <= hpx::state::running);
-        HPX_UNUSED(oldstate);
 
-        // wait for all threads to start up before before starting HPX work
+        // wait for all threads to start up before starting HPX work
         startup->wait();
 
         LTM_(info).format(
@@ -514,11 +497,12 @@ namespace hpx::threads::detail {
         {
             try
             {
-                manage_active_thread_count count(thread_count_);
+                auto on_exit =
+                    hpx::experimental::scope_exit([this] { --thread_count_; });
 
                 // run the work queue
-                hpx::threads::coroutines::prepare_main_thread main_thread;
-                HPX_UNUSED(main_thread);
+                [[maybe_unused]] hpx::threads::coroutines::
+                    prepare_main_thread const main_thread;
 
                 // run main Scheduler loop until terminated
                 scheduling_counter_data& counter_data =
@@ -737,8 +721,8 @@ namespace hpx::threads::detail {
     std::int64_t scheduled_thread_pool<Scheduler>::get_executed_threads(
         std::size_t num, bool reset)
     {
-        std::int64_t executed_threads = 0;
-        std::int64_t reset_executed_threads = 0;
+        std::int64_t executed_threads;
+        std::int64_t reset_executed_threads;
 
         if (num != static_cast<std::size_t>(-1))
         {
@@ -775,13 +759,13 @@ namespace hpx::threads::detail {
     template <typename Scheduler>
     std::int64_t scheduled_thread_pool<Scheduler>::get_executed_threads() const
     {
-        std::int64_t executed_threads =
+        std::int64_t const executed_threads =
             accumulate_projected(counter_data_.begin(), counter_data_.end(),
                 static_cast<std::int64_t>(0),
                 &scheduling_counter_data::executed_threads_);
 
 #if defined(HPX_HAVE_THREAD_CUMULATIVE_COUNTS)
-        std::int64_t reset_executed_threads =
+        std::int64_t const reset_executed_threads =
             accumulate_projected(counter_data_.begin(), counter_data_.end(),
                 static_cast<std::int64_t>(0),
                 &scheduling_counter_data::reset_executed_threads_);
@@ -798,8 +782,8 @@ namespace hpx::threads::detail {
     std::int64_t scheduled_thread_pool<Scheduler>::get_executed_thread_phases(
         std::size_t num, bool reset)
     {
-        std::int64_t executed_phases = 0;
-        std::int64_t reset_executed_phases = 0;
+        std::int64_t executed_phases;
+        std::int64_t reset_executed_phases;
 
         if (num != static_cast<std::size_t>(-1))
         {
@@ -839,10 +823,10 @@ namespace hpx::threads::detail {
     std::int64_t scheduled_thread_pool<Scheduler>::get_thread_phase_duration(
         std::size_t num, bool reset)
     {
-        std::int64_t exec_total = 0;
-        std::int64_t num_phases = 0;
-        std::int64_t reset_exec_total = 0;
-        std::int64_t reset_num_phases = 0;
+        std::int64_t exec_total;
+        std::int64_t num_phases;
+        std::int64_t reset_exec_total;
+        std::int64_t reset_num_phases;
 
         if (num != static_cast<std::size_t>(-1))
         {
@@ -897,17 +881,18 @@ namespace hpx::threads::detail {
         num_phases -= reset_num_phases;
 
         return static_cast<std::int64_t>(
-            (double(exec_total) * timestamp_scale_) / double(num_phases));
+            (static_cast<double>(exec_total) * timestamp_scale_) /
+            static_cast<double>(num_phases));
     }
 
     template <typename Scheduler>
     std::int64_t scheduled_thread_pool<Scheduler>::get_thread_duration(
         std::size_t num, bool reset)
     {
-        std::int64_t exec_total = 0;
-        std::int64_t num_threads = 0;
-        std::int64_t reset_exec_total = 0;
-        std::int64_t reset_num_threads = 0;
+        std::int64_t exec_total;
+        std::int64_t num_threads;
+        std::int64_t reset_exec_total;
+        std::int64_t reset_num_threads;
 
         if (num != static_cast<std::size_t>(-1))
         {
@@ -959,20 +944,21 @@ namespace hpx::threads::detail {
         num_threads -= reset_num_threads;
 
         return static_cast<std::int64_t>(
-            (double(exec_total) * timestamp_scale_) / double(num_threads));
+            (static_cast<double>(exec_total) * timestamp_scale_) /
+            static_cast<double>(num_threads));
     }
 
     template <typename Scheduler>
     std::int64_t scheduled_thread_pool<Scheduler>::get_thread_phase_overhead(
         std::size_t num, bool reset)
     {
-        std::int64_t exec_total = 0;
-        std::int64_t tfunc_total = 0;
-        std::int64_t num_phases = 0;
+        std::int64_t exec_total;
+        std::int64_t tfunc_total;
+        std::int64_t num_phases;
 
-        std::int64_t reset_exec_total = 0;
-        std::int64_t reset_tfunc_total = 0;
-        std::int64_t reset_num_phases = 0;
+        std::int64_t reset_exec_total;
+        std::int64_t reset_tfunc_total;
+        std::int64_t reset_num_phases;
 
         if (num != static_cast<std::size_t>(-1))
         {
@@ -1051,21 +1037,21 @@ namespace hpx::threads::detail {
         HPX_ASSERT(tfunc_total >= exec_total);
 
         return static_cast<std::int64_t>(
-            double((tfunc_total - exec_total) * timestamp_scale_) /
-            double(num_phases));
+            static_cast<double>(tfunc_total - exec_total) * timestamp_scale_ /
+            static_cast<double>(num_phases));
     }
 
     template <typename Scheduler>
     std::int64_t scheduled_thread_pool<Scheduler>::get_thread_overhead(
         std::size_t num, bool reset)
     {
-        std::int64_t exec_total = 0;
-        std::int64_t tfunc_total = 0;
-        std::int64_t num_threads = 0;
+        std::int64_t exec_total;
+        std::int64_t tfunc_total;
+        std::int64_t num_threads;
 
-        std::int64_t reset_exec_total = 0;
-        std::int64_t reset_tfunc_total = 0;
-        std::int64_t reset_num_threads = 0;
+        std::int64_t reset_exec_total;
+        std::int64_t reset_tfunc_total;
+        std::int64_t reset_num_threads;
 
         if (num != static_cast<std::size_t>(-1))
         {
@@ -1140,8 +1126,8 @@ namespace hpx::threads::detail {
         HPX_ASSERT(tfunc_total >= exec_total);
 
         return static_cast<std::int64_t>(
-            double((tfunc_total - exec_total) * timestamp_scale_) /
-            double(num_threads));
+            static_cast<double>(tfunc_total - exec_total) * timestamp_scale_ /
+            static_cast<double>(num_threads));
     }
 
     template <typename Scheduler>
@@ -1149,8 +1135,8 @@ namespace hpx::threads::detail {
     scheduled_thread_pool<Scheduler>::get_cumulative_thread_duration(
         std::size_t num, bool reset)
     {
-        std::int64_t exec_total = 0;
-        std::int64_t reset_exec_total = 0;
+        std::int64_t exec_total;
+        std::int64_t reset_exec_total;
 
         if (num != static_cast<std::size_t>(-1))
         {
@@ -1187,7 +1173,8 @@ namespace hpx::threads::detail {
 
         exec_total -= reset_exec_total;
 
-        return static_cast<std::int64_t>(double(exec_total) * timestamp_scale_);
+        return static_cast<std::int64_t>(
+            static_cast<double>(exec_total) * timestamp_scale_);
     }
 
     template <typename Scheduler>
@@ -1195,10 +1182,10 @@ namespace hpx::threads::detail {
     scheduled_thread_pool<Scheduler>::get_cumulative_thread_overhead(
         std::size_t num, bool reset)
     {
-        std::int64_t exec_total = 0;
-        std::int64_t reset_exec_total = 0;
-        std::int64_t tfunc_total = 0;
-        std::int64_t reset_tfunc_total = 0;
+        std::int64_t exec_total;
+        std::int64_t reset_exec_total;
+        std::int64_t tfunc_total;
+        std::int64_t reset_tfunc_total;
 
         if (num != static_cast<std::size_t>(-1))
         {
@@ -1256,8 +1243,9 @@ namespace hpx::threads::detail {
         exec_total -= reset_exec_total;
         tfunc_total -= reset_tfunc_total;
 
-        return static_cast<std::int64_t>(
-            (double(tfunc_total) - double(exec_total)) * timestamp_scale_);
+        return static_cast<std::int64_t>((static_cast<double>(tfunc_total) -
+                                             static_cast<double>(exec_total)) *
+            timestamp_scale_);
     }
 #endif    // HPX_HAVE_THREAD_IDLE_RATES
 #endif    // HPX_HAVE_THREAD_CUMULATIVE_COUNTS
@@ -1269,10 +1257,10 @@ namespace hpx::threads::detail {
     std::int64_t scheduled_thread_pool<Scheduler>::get_background_overhead(
         std::size_t num, bool reset)
     {
-        std::int64_t bg_total = 0;
-        std::int64_t reset_bg_total = 0;
-        std::int64_t tfunc_total = 0;
-        std::int64_t reset_tfunc_total = 0;
+        std::int64_t bg_total;
+        std::int64_t reset_bg_total;
+        std::int64_t tfunc_total;
+        std::int64_t reset_tfunc_total;
 
         if (num != std::size_t(-1))
         {
@@ -1335,8 +1323,8 @@ namespace hpx::threads::detail {
     std::int64_t scheduled_thread_pool<Scheduler>::get_background_work_duration(
         std::size_t num, bool reset)
     {
-        std::int64_t bg_total = 0;
-        std::int64_t reset_bg_total = 0;
+        std::int64_t bg_total;
+        std::int64_t reset_bg_total;
 
         if (num != std::size_t(-1))
         {
@@ -1376,10 +1364,10 @@ namespace hpx::threads::detail {
     std::int64_t scheduled_thread_pool<Scheduler>::get_background_send_overhead(
         std::size_t num, bool reset)
     {
-        std::int64_t bg_total = 0;
-        std::int64_t reset_bg_total = 0;
-        std::int64_t tfunc_total = 0;
-        std::int64_t reset_tfunc_total = 0;
+        std::int64_t bg_total;
+        std::int64_t reset_bg_total;
+        std::int64_t tfunc_total;
+        std::int64_t reset_tfunc_total;
 
         if (num != std::size_t(-1))
         {
@@ -1444,8 +1432,8 @@ namespace hpx::threads::detail {
     std::int64_t scheduled_thread_pool<Scheduler>::get_background_send_duration(
         std::size_t num, bool reset)
     {
-        std::int64_t bg_total = 0;
-        std::int64_t reset_bg_total = 0;
+        std::int64_t bg_total;
+        std::int64_t reset_bg_total;
 
         if (num != std::size_t(-1))
         {
@@ -1486,10 +1474,10 @@ namespace hpx::threads::detail {
     scheduled_thread_pool<Scheduler>::get_background_receive_overhead(
         std::size_t num, bool reset)
     {
-        std::int64_t bg_total = 0;
-        std::int64_t reset_bg_total = 0;
-        std::int64_t tfunc_total = 0;
-        std::int64_t reset_tfunc_total = 0;
+        std::int64_t bg_total;
+        std::int64_t reset_bg_total;
+        std::int64_t tfunc_total;
+        std::int64_t reset_tfunc_total;
 
         if (num != std::size_t(-1))
         {
@@ -1559,8 +1547,8 @@ namespace hpx::threads::detail {
     scheduled_thread_pool<Scheduler>::get_background_receive_duration(
         std::size_t num, bool reset)
     {
-        std::int64_t bg_total = 0;
-        std::int64_t reset_bg_total = 0;
+        std::int64_t bg_total;
+        std::int64_t reset_bg_total;
 
         if (num != std::size_t(-1))
         {
@@ -1604,8 +1592,8 @@ namespace hpx::threads::detail {
     std::int64_t scheduled_thread_pool<Scheduler>::get_cumulative_duration(
         std::size_t num, bool reset)
     {
-        std::int64_t tfunc_total = 0;
-        std::int64_t reset_tfunc_total = 0;
+        std::int64_t tfunc_total;
+        std::int64_t reset_tfunc_total;
 
         if (num != static_cast<std::size_t>(-1))
         {
@@ -1638,7 +1626,7 @@ namespace hpx::threads::detail {
         tfunc_total -= reset_tfunc_total;
 
         return static_cast<std::int64_t>(
-            double(tfunc_total) * timestamp_scale_);
+            static_cast<double>(tfunc_total) * timestamp_scale_);
     }
 
 #if defined(HPX_HAVE_THREAD_IDLE_RATES)
@@ -1749,11 +1737,11 @@ namespace hpx::threads::detail {
             counter_data_.end(), static_cast<std::int64_t>(0),
             &scheduling_counter_data::tfunc_times_);
 
-        std::int64_t reset_exec_total =
+        std::int64_t const reset_exec_total =
             accumulate_projected(counter_data_.begin(), counter_data_.end(),
                 static_cast<std::int64_t>(0),
                 &scheduling_counter_data::reset_idle_rate_time_);
-        std::int64_t reset_tfunc_total =
+        std::int64_t const reset_tfunc_total =
             accumulate_projected(counter_data_.begin(), counter_data_.end(),
                 static_cast<std::int64_t>(0),
                 &scheduling_counter_data::reset_idle_rate_time_total_);
@@ -1794,8 +1782,9 @@ namespace hpx::threads::detail {
 
         std::int64_t exec_time = counter_data_[num].exec_times_;
         std::int64_t tfunc_time = counter_data_[num].tfunc_times_;
-        std::int64_t reset_exec_time = counter_data_[num].reset_idle_rate_time_;
-        std::int64_t reset_tfunc_time =
+        std::int64_t const reset_exec_time =
+            counter_data_[num].reset_idle_rate_time_;
+        std::int64_t const reset_tfunc_time =
             counter_data_[num].reset_idle_rate_time_total_;
 
         if (reset)
diff --git a/libs/core/thread_pools/include/hpx/thread_pools/scheduling_loop.hpp b/libs/core/thread_pools/include/hpx/thread_pools/scheduling_loop.hpp
index 6a6dbb766d69..1ca53941ee4b 100644
--- a/libs/core/thread_pools/include/hpx/thread_pools/scheduling_loop.hpp
+++ b/libs/core/thread_pools/include/hpx/thread_pools/scheduling_loop.hpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2023 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -9,8 +9,8 @@
 #include <hpx/config.hpp>
 #include <hpx/assert.hpp>
 #include <hpx/execution_base/this_thread.hpp>
+#include <hpx/functional/experimental/scope_exit.hpp>
 #include <hpx/hardware/timestamp.hpp>
-#include <hpx/modules/itt_notify.hpp>
 #include <hpx/thread_pools/detail/background_thread.hpp>
 #include <hpx/thread_pools/detail/scheduling_callbacks.hpp>
 #include <hpx/thread_pools/detail/scheduling_counters.hpp>
@@ -20,6 +20,10 @@
 #include <hpx/threading_base/scheduler_state.hpp>
 #include <hpx/threading_base/thread_data.hpp>
 
+#if defined(HPX_HAVE_ITTNOTIFY) && HPX_HAVE_ITTNOTIFY != 0 &&                  \
+    !defined(HPX_HAVE_APEX)
+#include <hpx/modules/itt_notify.hpp>
+#endif
 #if defined(HPX_HAVE_APEX)
 #include <hpx/threading_base/external_timer.hpp>
 #endif
@@ -38,81 +42,40 @@ namespace hpx::threads::detail {
     {
         idle_collect_rate(
             std::int64_t& tfunc_time, std::int64_t& exec_time) noexcept
-          : start_timestamp_(util::hardware::timestamp())
+          : start_timestamp_(
+                static_cast<std::int64_t>(util::hardware::timestamp()))
           , tfunc_time_(tfunc_time)
           , exec_time_(exec_time)
         {
         }
 
-        void collect_exec_time(std::int64_t timestamp) const noexcept
+        void collect_exec_time(std::uint64_t timestamp) const noexcept
         {
-            exec_time_ += util::hardware::timestamp() - timestamp;
+            exec_time_ += static_cast<std::int64_t>(
+                util::hardware::timestamp() - timestamp);
         }
 
         void take_snapshot() noexcept
         {
             if (tfunc_time_ == static_cast<std::int64_t>(-1))
             {
-                start_timestamp_ = util::hardware::timestamp();
+                start_timestamp_ =
+                    static_cast<std::int64_t>(util::hardware::timestamp());
                 tfunc_time_ = 0;
                 exec_time_ = 0;
             }
             else
             {
-                tfunc_time_ = util::hardware::timestamp() - start_timestamp_;
+                tfunc_time_ =
+                    static_cast<std::int64_t>(util::hardware::timestamp()) -
+                    start_timestamp_;
             }
         }
 
         std::int64_t start_timestamp_;
-
         std::int64_t& tfunc_time_;
         std::int64_t& exec_time_;
     };
-
-    struct exec_time_wrapper
-    {
-        explicit exec_time_wrapper(idle_collect_rate& idle_rate) noexcept
-          : timestamp_(util::hardware::timestamp())
-          , idle_rate_(idle_rate)
-        {
-        }
-
-        exec_time_wrapper(exec_time_wrapper const&) = delete;
-        exec_time_wrapper(exec_time_wrapper&&) = delete;
-
-        exec_time_wrapper& operator=(exec_time_wrapper const&) = delete;
-        exec_time_wrapper& operator=(exec_time_wrapper&&) = delete;
-
-        ~exec_time_wrapper()
-        {
-            idle_rate_.collect_exec_time(timestamp_);
-        }
-
-        std::int64_t timestamp_;
-        idle_collect_rate& idle_rate_;
-    };
-
-    struct tfunc_time_wrapper
-    {
-        explicit constexpr tfunc_time_wrapper(
-            idle_collect_rate& idle_rate) noexcept
-          : idle_rate_(idle_rate)
-        {
-        }
-
-        tfunc_time_wrapper(tfunc_time_wrapper const&) = delete;
-        tfunc_time_wrapper(tfunc_time_wrapper&&) = delete;
-
-        tfunc_time_wrapper& operator=(tfunc_time_wrapper const&) = delete;
-        tfunc_time_wrapper& operator=(tfunc_time_wrapper&&) = delete;
-
-        ~tfunc_time_wrapper()
-        {
-            idle_rate_.take_snapshot();
-        }
-
-        idle_collect_rate& idle_rate_;
-    };
 #else
     struct idle_collect_rate
     {
@@ -121,48 +84,17 @@ namespace hpx::threads::detail {
         {
         }
     };
-
-    struct exec_time_wrapper
-    {
-        explicit constexpr exec_time_wrapper(idle_collect_rate&) noexcept {}
-    };
-
-    struct tfunc_time_wrapper
-    {
-        explicit constexpr tfunc_time_wrapper(idle_collect_rate&) noexcept {}
-    };
 #endif
 
     ///////////////////////////////////////////////////////////////////////////
-    struct is_active_wrapper
-    {
-        explicit is_active_wrapper(bool& is_active) noexcept
-          : is_active_(is_active)
-        {
-            is_active = true;
-        }
-
-        is_active_wrapper(is_active_wrapper const&) = delete;
-        is_active_wrapper(is_active_wrapper&&) = delete;
-
-        is_active_wrapper& operator=(is_active_wrapper const&) = delete;
-        is_active_wrapper& operator=(is_active_wrapper&&) = delete;
-
-        ~is_active_wrapper()
-        {
-            is_active_ = false;
-        }
-
-        bool& is_active_;
-    };
-
     template <typename SchedulingPolicy>
     void scheduling_loop(std::size_t num_thread, SchedulingPolicy& scheduler,
         scheduling_counters& counters, scheduling_callbacks& params)
     {
         std::atomic<hpx::state>& this_state = scheduler.get_state(num_thread);
 
-#if HPX_HAVE_ITTNOTIFY != 0 && !defined(HPX_HAVE_APEX)
+#if defined(HPX_HAVE_ITTNOTIFY) && HPX_HAVE_ITTNOTIFY != 0 &&                  \
+    !defined(HPX_HAVE_APEX)
         util::itt::stack_context ctx;    // helper for itt support
         util::itt::thread_domain const thread_domain;
         util::itt::id threadid(thread_domain, &scheduler);
@@ -176,8 +108,11 @@ namespace hpx::threads::detail {
 
         background_work_exec_time bg_work_exec_time_init(counters);
 
+#ifdef HPX_HAVE_THREAD_IDLE_RATES
         idle_collect_rate idle_rate(counters.tfunc_time_, counters.exec_time_);
-        [[maybe_unused]] tfunc_time_wrapper tfunc_time_collector(idle_rate);
+        auto tfunc_time_collector = hpx::experimental::scope_exit(
+            [&idle_rate] { idle_rate.take_snapshot(); });
+#endif
 
         // spin for some time after queues have become empty
         bool may_exit = false;
@@ -267,15 +202,24 @@ namespace hpx::threads::detail {
                                 thrd_stat.get_previous(),
                                 thread_schedule_state::active);
 
-                            [[maybe_unused]] tfunc_time_wrapper
-                                tfunc_time_collector_inner(idle_rate);
-
+#ifdef HPX_HAVE_THREAD_IDLE_RATES
+                            auto tfunc_time_collector_inner =
+                                hpx::experimental::scope_exit([&idle_rate] {
+                                    idle_rate.take_snapshot();
+                                });
+#endif
                             // thread returns new required state store the
                             // returned state in the thread
                             {
-                                is_active_wrapper utilization(
-                                    counters.is_active_);
-#if HPX_HAVE_ITTNOTIFY != 0 && !defined(HPX_HAVE_APEX)
+                                counters.is_active_ = true;
+                                auto utilization =
+                                    hpx::experimental::scope_exit(
+                                        [&is_active = counters.is_active_] {
+                                            is_active = false;
+                                        });
+
+#if defined(HPX_HAVE_ITTNOTIFY) && HPX_HAVE_ITTNOTIFY != 0 &&                  \
+    !defined(HPX_HAVE_APEX)
                                 util::itt::caller_context cctx(ctx);
                                 // util::itt::undo_frame_context undoframe(fctx);
                                 util::itt::task task =
@@ -285,11 +229,16 @@ namespace hpx::threads::detail {
                                 task.add_metadata(
                                     task_phase, thrdptr->get_thread_phase());
 #endif
+#ifdef HPX_HAVE_THREAD_IDLE_RATES
                                 // Record time elapsed in thread changing state
                                 // and add to aggregate execution time.
-                                [[maybe_unused]] exec_time_wrapper
-                                    exec_time_collector(idle_rate);
-
+                                auto exec_time_collector =
+                                    hpx::experimental::scope_exit(
+                                        [&idle_rate,
+                                            ts = util::hardware::timestamp()] {
+                                            idle_rate.collect_exec_time(ts);
+                                        });
+#endif
 #if defined(HPX_HAVE_APEX)
                                 // get the APEX data pointer, in case we are
                                 // resuming the thread and have to restore any
@@ -385,9 +334,10 @@ namespace hpx::threads::detail {
                     else if (HPX_UNLIKELY(state_val ==
                                  thread_schedule_state::pending_boost))
                     {
-                        thrdptr->set_state(thread_schedule_state::pending);
+                        [[maybe_unused]] auto oldstate =
+                            thrdptr->set_state(thread_schedule_state::pending);
 
-                        if (HPX_LIKELY(next_thrd == nullptr))
+                        if (HPX_LIKELY(!next_thrd))
                         {
                             // reschedule this thread right away if the
                             // background work will be triggered
@@ -464,7 +414,7 @@ namespace hpx::threads::detail {
             {
                 ++idle_loop_count;
 
-                next_thrd = nullptr;
+                next_thrd = thread_id_ref_type();
                 if (scheduler.wait_or_add_new(num_thread, running,
                         idle_loop_count, enable_stealing_staged, added,
                         &next_thrd))
@@ -503,7 +453,8 @@ namespace hpx::threads::detail {
                                     HPX_ASSERT(background_running);
                                     *background_running = false;    //-V522
 
-                                    // do background work in parcel layer and in agas
+                                    // do background work in parcel layer and in
+                                    // agas
                                     [[maybe_unused]] bool const has_exited =
                                         call_background_thread(
                                             background_thread, next_thrd,
@@ -514,7 +465,7 @@ namespace hpx::threads::detail {
                                     // the background thread should have exited
                                     HPX_ASSERT(has_exited);
 
-                                    background_thread = thread_id_type();
+                                    background_thread.reset();
                                     background_running.reset();
                                 }
                                 else
@@ -625,7 +576,7 @@ namespace hpx::threads::detail {
                         // the background thread should have exited
                         HPX_ASSERT(has_exited);
 
-                        background_thread = thread_id_type();
+                        background_thread.reset();
                         background_running.reset();
                     }
                     else
diff --git a/libs/core/thread_pools/src/detail/background_thread.cpp b/libs/core/thread_pools/src/detail/background_thread.cpp
index 6906b05a9b65..4982df481111 100644
--- a/libs/core/thread_pools/src/detail/background_thread.cpp
+++ b/libs/core/thread_pools/src/detail/background_thread.cpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2023 Hartmut Kaiser
+//  Copyright (c) 2023-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -74,8 +74,9 @@ namespace hpx::threads::detail {
             get_thread_id_data(background_thread));
 
         // We can now set the state to pending
-        get_thread_id_data(background_thread)
-            ->set_state(thread_schedule_state::pending);
+        [[maybe_unused]] auto old_state =
+            get_thread_id_data(background_thread)
+                ->set_state(thread_schedule_state::pending);
         return background_thread;
     }
 
@@ -110,7 +111,7 @@ namespace hpx::threads::detail {
             }
         }
 
-        constexpr bool is_valid() const noexcept
+        [[nodiscard]] constexpr bool is_valid() const noexcept
         {
             return need_restore_state_;
         }
@@ -129,9 +130,9 @@ namespace hpx::threads::detail {
         }
 
         // Get the state this thread was in before execution (usually pending),
-        // this helps making sure no other worker-thread is started to execute
+        // this helps to make sure no other worker-thread is started to execute
         // this HPX-thread in the meantime.
-        thread_schedule_state get_previous() const noexcept
+        [[nodiscard]] thread_schedule_state get_previous() const noexcept
         {
             return prev_state_.state();
         }
@@ -156,7 +157,8 @@ namespace hpx::threads::detail {
             need_restore_state_ = false;
         }
 
-        constexpr thread_id_ref_type const& get_next_thread() const noexcept
+        [[nodiscard]] constexpr thread_id_ref_type const& get_next_thread()
+            const noexcept
         {
             return next_thread_id_;
         }
@@ -219,10 +221,10 @@ namespace hpx::threads::detail {
                 // invoke background thread
                 thrd_stat = (*thrdptr)(context_storage);
 
-                thread_id_ref_type next = thrd_stat.move_next_thread();
-                if (next != nullptr && next != background_thread)
+                if (thread_id_ref_type next = thrd_stat.move_next_thread();
+                    next && next != background_thread)
                 {
-                    if (next_thrd == nullptr)
+                    if (!next_thrd)
                     {
                         next_thrd = HPX_MOVE(next);
                     }
@@ -243,7 +245,8 @@ namespace hpx::threads::detail {
 
             if (HPX_LIKELY(state_val == thread_schedule_state::pending_boost))
             {
-                thrdptr->set_state(thread_schedule_state::pending);
+                [[maybe_unused]] auto old_state =
+                    thrdptr->set_state(thread_schedule_state::pending);
             }
             else if (thread_schedule_state::terminated == state_val)
             {
diff --git a/libs/core/thread_pools/src/scheduled_thread_pool.cpp b/libs/core/thread_pools/src/scheduled_thread_pool.cpp
index 308d8748f586..8d02ecc8be51 100644
--- a/libs/core/thread_pools/src/scheduled_thread_pool.cpp
+++ b/libs/core/thread_pools/src/scheduled_thread_pool.cpp
@@ -17,64 +17,45 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 /// explicit template instantiation for the thread pools of our choice
-template class HPX_CORE_EXPORT hpx::threads::policies::local_queue_scheduler<>;
 template class HPX_CORE_EXPORT hpx::threads::detail::scheduled_thread_pool<
     hpx::threads::policies::local_queue_scheduler<>>;
 
-template class HPX_CORE_EXPORT hpx::threads::policies::static_queue_scheduler<>;
 template class HPX_CORE_EXPORT hpx::threads::detail::scheduled_thread_pool<
     hpx::threads::policies::static_queue_scheduler<>>;
 
-template class HPX_CORE_EXPORT hpx::threads::policies::background_scheduler<>;
 template class HPX_CORE_EXPORT hpx::threads::detail::scheduled_thread_pool<
     hpx::threads::policies::background_scheduler<>>;
 
-template class HPX_CORE_EXPORT
-    hpx::threads::policies::local_priority_queue_scheduler<>;
 template class HPX_CORE_EXPORT hpx::threads::detail::scheduled_thread_pool<
     hpx::threads::policies::local_priority_queue_scheduler<std::mutex,
         hpx::threads::policies::lockfree_fifo>>;
 
-template class HPX_CORE_EXPORT
-    hpx::threads::policies::static_priority_queue_scheduler<>;
 template class HPX_CORE_EXPORT hpx::threads::detail::scheduled_thread_pool<
     hpx::threads::policies::static_priority_queue_scheduler<>>;
 #if defined(HPX_HAVE_CXX11_STD_ATOMIC_128BIT)
-template class HPX_CORE_EXPORT
-    hpx::threads::policies::local_priority_queue_scheduler<std::mutex,
-        hpx::threads::policies::lockfree_lifo>;
 template class HPX_CORE_EXPORT hpx::threads::detail::scheduled_thread_pool<
     hpx::threads::policies::local_priority_queue_scheduler<std::mutex,
         hpx::threads::policies::lockfree_lifo>>;
 #endif
 
 #if defined(HPX_HAVE_CXX11_STD_ATOMIC_128BIT)
-template class HPX_CORE_EXPORT
-    hpx::threads::policies::local_priority_queue_scheduler<std::mutex,
-        hpx::threads::policies::lockfree_abp_fifo>;
 template class HPX_CORE_EXPORT hpx::threads::detail::scheduled_thread_pool<
     hpx::threads::policies::local_priority_queue_scheduler<std::mutex,
         hpx::threads::policies::lockfree_abp_fifo>>;
-template class HPX_CORE_EXPORT
-    hpx::threads::policies::local_priority_queue_scheduler<std::mutex,
-        hpx::threads::policies::lockfree_abp_lifo>;
 template class HPX_CORE_EXPORT hpx::threads::detail::scheduled_thread_pool<
     hpx::threads::policies::local_priority_queue_scheduler<std::mutex,
         hpx::threads::policies::lockfree_abp_lifo>>;
 #endif
 
-template class HPX_CORE_EXPORT
-    hpx::threads::policies::shared_priority_queue_scheduler<>;
 template class HPX_CORE_EXPORT hpx::threads::detail::scheduled_thread_pool<
     hpx::threads::policies::shared_priority_queue_scheduler<>>;
 
-template class HPX_CORE_EXPORT
-    hpx::threads::policies::local_workrequesting_scheduler<>;
 template class HPX_CORE_EXPORT hpx::threads::detail::scheduled_thread_pool<
     hpx::threads::policies::local_workrequesting_scheduler<>>;
-template class HPX_CORE_EXPORT
-    hpx::threads::policies::local_workrequesting_scheduler<std::mutex,
-        hpx::threads::policies::lockfree_lifo>;
 template class HPX_CORE_EXPORT hpx::threads::detail::scheduled_thread_pool<
     hpx::threads::policies::local_workrequesting_scheduler<std::mutex,
         hpx::threads::policies::lockfree_lifo>>;
+
+template class HPX_CORE_EXPORT hpx::threads::detail::scheduled_thread_pool<
+    hpx::threads::policies::local_workrequesting_scheduler<std::mutex,
+        hpx::threads::policies::concurrentqueue_fifo>>;
diff --git a/libs/core/threading_base/include/hpx/threading_base/thread_data.hpp b/libs/core/threading_base/include/hpx/threading_base/thread_data.hpp
index f32d5ac84545..ff4e8348cb25 100644
--- a/libs/core/threading_base/include/hpx/threading_base/thread_data.hpp
+++ b/libs/core/threading_base/include/hpx/threading_base/thread_data.hpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2023 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //  Copyright (c)      2011 Bryce Lelbach
 //  Copyright (c) 2008-2009 Chirag Dekate, Anshul Tandon
 //
@@ -253,24 +253,25 @@ namespace hpx::threads {
         }
 
 #if !defined(HPX_HAVE_THREAD_DESCRIPTION)
-        threads::thread_description get_description() const
+        static constexpr threads::thread_description get_description() noexcept
         {
-            return threads::thread_description("<unknown>");
+            return {"<unknown>"};
         }
-        threads::thread_description set_description(
-            threads::thread_description /*value*/)
+        static constexpr threads::thread_description set_description(
+            threads::thread_description /*value*/) noexcept
         {
-            return threads::thread_description("<unknown>");
+            return {"<unknown>"};
         }
 
-        threads::thread_description get_lco_description() const    //-V524
+        static constexpr threads::thread_description
+        get_lco_description() noexcept    //-V524
         {
-            return threads::thread_description("<unknown>");
+            return {"<unknown>"};
         }
-        threads::thread_description set_lco_description(    //-V524
-            threads::thread_description /*value*/)
+        static constexpr threads::thread_description set_lco_description(
+            threads::thread_description /*value*/) noexcept    //-V524
         {
-            return threads::thread_description("<unknown>");
+            return {"<unknown>"};
         }
 #else
         threads::thread_description get_description() const
@@ -306,20 +307,20 @@ namespace hpx::threads {
 
 #if !defined(HPX_HAVE_THREAD_PARENT_REFERENCE)
         /// Return the locality of the parent thread
-        constexpr std::uint32_t get_parent_locality_id() const noexcept
+        static constexpr std::uint32_t get_parent_locality_id() noexcept
         {
             // this is the same as naming::invalid_locality_id
             return ~static_cast<std::uint32_t>(0);
         }
 
         /// Return the thread id of the parent thread
-        constexpr thread_id_type get_parent_thread_id() const noexcept
+        static constexpr thread_id_type get_parent_thread_id() noexcept
         {
             return threads::invalid_thread_id;
         }
 
         /// Return the phase of the parent thread
-        constexpr std::size_t get_parent_thread_phase() const noexcept
+        static constexpr std::size_t get_parent_thread_phase() noexcept
         {
             return 0;
         }
diff --git a/libs/core/threading_base/include/hpx/threading_base/thread_description.hpp b/libs/core/threading_base/include/hpx/threading_base/thread_description.hpp
index ebc9e1468ffc..64f7b5cec507 100644
--- a/libs/core/threading_base/include/hpx/threading_base/thread_description.hpp
+++ b/libs/core/threading_base/include/hpx/threading_base/thread_description.hpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2016-2023 Hartmut Kaiser
+//  Copyright (c) 2016-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -254,12 +254,12 @@ namespace hpx::threads {
         {
         }
 
-        [[nodiscard]] constexpr data_type kind() const noexcept
+        [[nodiscard]] static constexpr data_type kind() noexcept
         {
             return data_type_description;
         }
 
-        [[nodiscard]] constexpr char const* get_description() const noexcept
+        [[nodiscard]] static constexpr char const* get_description() noexcept
         {
             return "<unknown>";
         }
diff --git a/libs/core/threading_base/include/hpx/threading_base/thread_pool_base.hpp b/libs/core/threading_base/include/hpx/threading_base/thread_pool_base.hpp
index 89d6789c33d9..8ae8b0f5d3f2 100644
--- a/libs/core/threading_base/include/hpx/threading_base/thread_pool_base.hpp
+++ b/libs/core/threading_base/include/hpx/threading_base/thread_pool_base.hpp
@@ -1,5 +1,5 @@
 //  Copyright (c)      2018 Mikael Simberg
-//  Copyright (c) 2007-2023 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -418,39 +418,17 @@ namespace hpx::threads {
         }
 
         std::int64_t get_thread_count_unknown(
-            std::size_t num_thread, bool reset)
-        {
-            return get_thread_count(thread_schedule_state::unknown,
-                thread_priority::default_, num_thread, reset);
-        }
-        std::int64_t get_thread_count_active(std::size_t num_thread, bool reset)
-        {
-            return get_thread_count(thread_schedule_state::active,
-                thread_priority::default_, num_thread, reset);
-        }
+            std::size_t num_thread, bool reset);
+        std::int64_t get_thread_count_active(
+            std::size_t num_thread, bool reset);
         std::int64_t get_thread_count_pending(
-            std::size_t num_thread, bool reset)
-        {
-            return get_thread_count(thread_schedule_state::pending,
-                thread_priority::default_, num_thread, reset);
-        }
+            std::size_t num_thread, bool reset);
         std::int64_t get_thread_count_suspended(
-            std::size_t num_thread, bool reset)
-        {
-            return get_thread_count(thread_schedule_state::suspended,
-                thread_priority::default_, num_thread, reset);
-        }
+            std::size_t num_thread, bool reset);
         std::int64_t get_thread_count_terminated(
-            std::size_t num_thread, bool reset)
-        {
-            return get_thread_count(thread_schedule_state::terminated,
-                thread_priority::default_, num_thread, reset);
-        }
-        std::int64_t get_thread_count_staged(std::size_t num_thread, bool reset)
-        {
-            return get_thread_count(thread_schedule_state::staged,
-                thread_priority::default_, num_thread, reset);
-        }
+            std::size_t num_thread, bool reset);
+        std::int64_t get_thread_count_staged(
+            std::size_t num_thread, bool reset);
 
         virtual std::int64_t get_scheduler_utilization() const = 0;
 
diff --git a/libs/core/threading_base/src/thread_data.cpp b/libs/core/threading_base/src/thread_data.cpp
index a884fbc33d52..228c8de17b2a 100644
--- a/libs/core/threading_base/src/thread_data.cpp
+++ b/libs/core/threading_base/src/thread_data.cpp
@@ -28,7 +28,10 @@ namespace hpx::threads {
 
     namespace detail {
 
-        static get_locality_id_type* get_locality_id_f;
+        namespace {
+
+            get_locality_id_type* get_locality_id_f = nullptr;
+        }
 
         void set_get_locality_id(get_locality_id_type* f)
         {
diff --git a/libs/core/threading_base/src/thread_helpers.cpp b/libs/core/threading_base/src/thread_helpers.cpp
index ecbd95b0c87d..810bf734686e 100644
--- a/libs/core/threading_base/src/thread_helpers.cpp
+++ b/libs/core/threading_base/src/thread_helpers.cpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2022 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //  Copyright (c)      2011 Bryce Lelbach
 //
 //  SPDX-License-Identifier: BSL-1.0
@@ -294,7 +294,10 @@ namespace hpx::threads {
 #endif
 
     ////////////////////////////////////////////////////////////////////////////
-    static thread_local std::size_t continuation_recursion_count(0);
+    namespace {
+
+        thread_local std::size_t continuation_recursion_count(0);
+    }
 
     std::size_t& get_continuation_recursion_count() noexcept
     {
@@ -423,9 +426,9 @@ namespace hpx::threads {
 
 namespace hpx::this_thread {
 
-    // The function \a suspend will return control to the thread manager
-    // (suspends the current thread). It sets the new state of this thread
-    // to the thread state passed as the parameter.
+    // The function 'suspend' will return control to the thread manager
+    // (suspends the current thread). It sets the new state of this thread to
+    // the thread state passed as the parameter.
     //
     // If the suspension was aborted, this function will throw a
     // \a yield_aborted exception.
@@ -619,14 +622,14 @@ namespace hpx::this_thread {
             return false;
 
 #if defined(HPX_HAVE_THREADS_GET_STACK_POINTER)
-        std::ptrdiff_t remaining_stack = get_available_stack_space();
+        std::ptrdiff_t const remaining_stack = get_available_stack_space();
         if (remaining_stack < 0)
         {
             HPX_THROW_EXCEPTION(hpx::error::out_of_memory,
                 "has_sufficient_stack_space", "Stack overflow");
         }
-        bool sufficient_stack_space =
-            std::size_t(remaining_stack) >= space_needed;
+        bool const sufficient_stack_space =
+            static_cast<std::size_t>(remaining_stack) >= space_needed;
 
         return sufficient_stack_space;
 #else
diff --git a/libs/core/threading_base/src/thread_num_tss.cpp b/libs/core/threading_base/src/thread_num_tss.cpp
index a4d9cf781b5a..d6687271ab32 100644
--- a/libs/core/threading_base/src/thread_num_tss.cpp
+++ b/libs/core/threading_base/src/thread_num_tss.cpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2023 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -17,7 +17,7 @@ namespace hpx::threads::detail {
 
         thread_nums& thread_nums_tss()
         {
-            static thread_local thread_nums thread_nums_tss_ = {
+            thread_local thread_nums thread_nums_tss_ = {
                 static_cast<std::size_t>(-1), static_cast<std::size_t>(-1),
                 static_cast<std::size_t>(-1)};
             return thread_nums_tss_;
diff --git a/libs/core/threading_base/src/thread_pool_base.cpp b/libs/core/threading_base/src/thread_pool_base.cpp
index d6f1812c5595..152f8fc7c062 100644
--- a/libs/core/threading_base/src/thread_pool_base.cpp
+++ b/libs/core/threading_base/src/thread_pool_base.cpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2023 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -87,6 +87,48 @@ namespace hpx::threads {
         return topo.cpuset_to_nodeset(used_processing_units);
     }
 
+    std::int64_t thread_pool_base::get_thread_count_unknown(
+        std::size_t num_thread, bool reset)
+    {
+        return get_thread_count(thread_schedule_state::unknown,
+            thread_priority::default_, num_thread, reset);
+    }
+
+    std::int64_t thread_pool_base::get_thread_count_active(
+        std::size_t num_thread, bool reset)
+    {
+        return get_thread_count(thread_schedule_state::active,
+            thread_priority::default_, num_thread, reset);
+    }
+
+    std::int64_t thread_pool_base::get_thread_count_pending(
+        std::size_t num_thread, bool reset)
+    {
+        return get_thread_count(thread_schedule_state::pending,
+            thread_priority::default_, num_thread, reset);
+    }
+
+    std::int64_t thread_pool_base::get_thread_count_suspended(
+        std::size_t num_thread, bool reset)
+    {
+        return get_thread_count(thread_schedule_state::suspended,
+            thread_priority::default_, num_thread, reset);
+    }
+
+    std::int64_t thread_pool_base::get_thread_count_terminated(
+        std::size_t num_thread, bool reset)
+    {
+        return get_thread_count(thread_schedule_state::terminated,
+            thread_priority::default_, num_thread, reset);
+    }
+
+    std::int64_t thread_pool_base::get_thread_count_staged(
+        std::size_t num_thread, bool reset)
+    {
+        return get_thread_count(thread_schedule_state::staged,
+            thread_priority::default_, num_thread, reset);
+    }
+
     std::size_t thread_pool_base::get_active_os_thread_count() const
     {
         std::size_t active_os_thread_count = 0;
diff --git a/libs/core/threading_base/tests/regressions/thread_stacksize_current.cpp b/libs/core/threading_base/tests/regressions/thread_stacksize_current.cpp
index eab76a473acc..28ef29897896 100644
--- a/libs/core/threading_base/tests/regressions/thread_stacksize_current.cpp
+++ b/libs/core/threading_base/tests/regressions/thread_stacksize_current.cpp
@@ -29,7 +29,7 @@ void test(hpx::threads::thread_stacksize stacksize)
     hpx::async(exec, [&exec_current, stacksize]() {
         // This thread should have the stack size stacksize; it has been
         // explicitly set in the executor.
-        hpx::threads::thread_stacksize self_stacksize =
+        hpx::threads::thread_stacksize const self_stacksize =
             hpx::threads::get_self_stacksize_enum();
         HPX_TEST_EQ(self_stacksize, stacksize);
         HPX_TEST_NEQ(self_stacksize, hpx::threads::thread_stacksize::current);
@@ -37,7 +37,7 @@ void test(hpx::threads::thread_stacksize stacksize)
         hpx::async(exec_current, [stacksize]() {
             // This thread should also have the stack size stacksize; it has
             // been inherited size from the parent thread.
-            hpx::threads::thread_stacksize self_stacksize =
+            hpx::threads::thread_stacksize const self_stacksize =
                 hpx::threads::get_self_stacksize_enum();
             HPX_TEST_EQ(self_stacksize, stacksize);
             HPX_TEST_NEQ(
@@ -63,7 +63,7 @@ int hpx_main()
 int main(int argc, char** argv)
 {
     // clang-format off
-    std::vector<std::string> schedulers = {
+    std::vector<std::string> const schedulers = {
         "local",
         "local-priority-fifo",
 #if defined(HPX_HAVE_CXX11_STD_ATOMIC_128BIT)
@@ -77,7 +77,8 @@ int main(int argc, char** argv)
 #endif
         "shared-priority",
         "local-workrequesting-fifo",
-        "local-workrequesting-lifo"
+        "local-workrequesting-lifo",
+        "local-workrequesting-mc",
     };
     // clang-format on
     for (auto const& scheduler : schedulers)
diff --git a/libs/core/threadmanager/include/hpx/modules/threadmanager.hpp b/libs/core/threadmanager/include/hpx/modules/threadmanager.hpp
index 6e5efe4e9693..985f364c9f6c 100644
--- a/libs/core/threadmanager/include/hpx/modules/threadmanager.hpp
+++ b/libs/core/threadmanager/include/hpx/modules/threadmanager.hpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2023 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //  Copyright (c) 2007-2009 Chirag Dekate, Anshul Tandon
 //  Copyright (c)      2011 Bryce Lelbach, Katelyn Kufahl
 //  Copyright (c)      2017 Shoshana Jakobovits
@@ -9,8 +9,6 @@
 #pragma once
 
 #include <hpx/config.hpp>
-#include <hpx/concurrency/barrier.hpp>
-#include <hpx/concurrency/spinlock.hpp>
 #include <hpx/io_service/io_service_pool_fwd.hpp>
 #include <hpx/modules/errors.hpp>
 #include <hpx/resource_partitioner/detail/partitioner.hpp>
@@ -25,23 +23,21 @@
 #include <hpx/timing/steady_clock.hpp>
 #include <hpx/topology/cpu_mask.hpp>
 
-#include <atomic>
 #include <cstddef>
 #include <cstdint>
 #include <exception>
 #include <iosfwd>
 #include <memory>
 #include <mutex>
-#include <numeric>
 #include <string>
 #include <thread>
-#include <type_traits>
 #include <utility>
 #include <vector>
 
 #include <hpx/config/warnings_prefix.hpp>
 
-namespace hpx { namespace threads {
+namespace hpx::threads {
+
     ///////////////////////////////////////////////////////////////////////////
     /// The \a thread-manager class is the central instance of management for
     /// all (non-depleted) threads
@@ -50,35 +46,38 @@ namespace hpx { namespace threads {
     private:
         // we use a simple mutex to protect the data members of the
         // thread manager for now
-        typedef std::mutex mutex_type;
+        using mutex_type = std::mutex;
 
     public:
-        typedef threads::policies::callback_notifier notification_policy_type;
-        typedef std::unique_ptr<thread_pool_base> pool_type;
-        typedef threads::policies::scheduler_base scheduler_type;
-        typedef std::vector<pool_type> pool_vector;
+        using notification_policy_type = threads::policies::callback_notifier;
+        using pool_type = std::unique_ptr<thread_pool_base>;
+        using pool_vector = std::vector<pool_type>;
 
         threadmanager(hpx::util::runtime_configuration& rtcfg_,
 #ifdef HPX_HAVE_TIMER_POOL
             util::io_service_pool& timer_pool,
 #endif
             notification_policy_type& notifier,
-            detail::network_background_callback_type
+            detail::network_background_callback_type const&
                 network_background_callback =
                     detail::network_background_callback_type());
+
+        threadmanager(threadmanager const&) = delete;
+        threadmanager(threadmanager&&) = delete;
+        threadmanager& operator=(threadmanager const&) = delete;
+        threadmanager& operator=(threadmanager&&) = delete;
+
         ~threadmanager();
 
-        void init();
+        void init() const;
         void create_pools();
 
         //! FIXME move to private and add --hpx:printpools cmd line option
-        void print_pools(std::ostream&);
+        void print_pools(std::ostream&) const;
 
         // Get functions
         thread_pool_base& default_pool() const;
 
-        scheduler_type& default_scheduler() const;
-
         thread_pool_base& get_pool(std::string const& pool_name) const;
         thread_pool_base& get_pool(pool_id_type const& pool_id) const;
         thread_pool_base& get_pool(std::size_t thread_index) const;
@@ -93,35 +92,31 @@ namespace hpx { namespace threads {
         /// created when the number of existing threads drops below the number
         /// of threads specified by the constructors max_count parameter.
         ///
-        /// \param func   [in] The function or function object to execute as
-        ///               the thread's function. This must have a signature as
-        ///               defined by \a thread_function_type.
-        /// \param description [in] The value of this parameter allows to
+        /// \param data   [in] The value of this parameter allows to
         ///               specify a description of the thread to create. This
         ///               information is used for logging purposes mainly, but
         ///               might be useful for debugging as well. This parameter
         ///               is optional and defaults to an empty string.
+        /// \param ec
         thread_id_ref_type register_work(
-            thread_init_data& data, error_code& ec = throws);
+            thread_init_data& data, error_code& ec = throws) const;
 
         /// The function \a register_thread adds a new work item to the thread
         /// manager. It creates a new \a thread, adds it to the internal
         /// management data structures, and schedules the new thread, if
         /// appropriate.
         ///
-        /// \param func   [in] The function or function object to execute as
-        ///               the thread's function. This must have a signature as
-        ///               defined by \a thread_function_type.
-        /// \param id     [out] This parameter will hold the id of the created
-        ///               thread. This id is guaranteed to be validly
-        ///               initialized before the thread function is executed.
-        /// \param description [in] The value of this parameter allows to
+        /// \param data   [in] The value of this parameter allows to
         ///               specify a description of the thread to create. This
         ///               information is used for logging purposes mainly, but
         ///               might be useful for debugging as well. This parameter
         ///               is optional and defaults to an empty string.
+        /// \param id     [out] This parameter will hold the id of the created
+        ///               thread. This id is guaranteed to be validly
+        ///               initialized before the thread function is executed.
+        /// \param ec
         void register_thread(thread_init_data& data, thread_id_ref_type& id,
-            error_code& ec = throws);
+            error_code& ec = throws) const;
 
         /// \brief  Run the thread manager's work queue. This function
         ///         instantiates the specified number of OS threads in each
@@ -131,53 +126,42 @@ namespace hpx { namespace threads {
         /// \returns      The function returns \a true if the thread manager
         ///               has been started successfully, otherwise it returns
         ///               \a false.
-        bool run();
+        bool run() const;
 
         /// \brief Forcefully stop the thread-manager
         ///
         /// \param blocking
         ///
-        void stop(bool blocking = true);
+        void stop(bool blocking = true) const;
 
-        bool is_busy();
-        bool is_idle();
+        bool is_busy() const;
+        bool is_idle() const;
 
-        void wait();
-        bool wait_for(hpx::chrono::steady_duration const& rel_time);
+        void wait() const;
+        bool wait_for(hpx::chrono::steady_duration const& rel_time) const;
 
         // \brief Suspend all thread pools.
-        void suspend();
+        void suspend() const;
 
         // \brief Resume all thread pools.
-        void resume();
-
-        /// \brief Return whether the thread manager is still running
-        //! This returns the "minimal state", i.e. the state of the
-        //! least advanced thread pool
-        state status() const
-        {
-            hpx::state result(hpx::state::last_valid_runtime_state);
+        void resume() const;
 
-            for (auto& pool_iter : pools_)
-            {
-                hpx::state s = pool_iter->get_state();
-                result = (std::min)(result, s);
-            }
-
-            return result;
-        }
+        /// Return whether the thread manager is still running This returns the
+        /// "minimal state", i.e. the state of the least advanced thread pool
+        hpx::state status() const;
 
         /// \brief return the number of HPX-threads with the given state
         ///
-        /// \note This function lock the internal OS lock in the thread manager
+        /// \note This function locks the internal OS lock in the thread manager
         std::int64_t get_thread_count(
             thread_schedule_state state = thread_schedule_state::unknown,
             thread_priority priority = thread_priority::default_,
-            std::size_t num_thread = std::size_t(-1), bool reset = false);
+            std::size_t num_thread = static_cast<std::size_t>(-1),
+            bool reset = false) const;
 
-        std::int64_t get_idle_core_count();
+        std::int64_t get_idle_core_count() const;
 
-        mask_type get_idle_core_mask();
+        mask_type get_idle_core_mask() const;
 
         std::int64_t get_background_thread_count() const;
 
@@ -188,36 +172,21 @@ namespace hpx { namespace threads {
         // \brief Abort all threads which are in suspended state. This will set
         //        the state of all suspended threads to \a pending while
         //        supplying the wait_abort extended state flag
-        void abort_all_suspended_threads();
+        void abort_all_suspended_threads() const;
 
         // \brief Clean up terminated threads. This deletes all threads which
         //        have been terminated but which are still held in the queue
         //        of terminated threads. Some schedulers might not do anything
         //        here.
-        bool cleanup_terminated(bool delete_all);
+        bool cleanup_terminated(bool delete_all) const;
 
         /// \brief Return the number of OS threads running in this thread-manager
         ///
         /// This function will return correct results only if the thread-manager
         /// is running.
-        std::size_t get_os_thread_count() const
-        {
-            std::lock_guard<mutex_type> lk(mtx_);
-            std::size_t total = 0;
-            for (auto& pool_iter : pools_)
-            {
-                total += pool_iter->get_os_thread_count();
-            }
-            return total;
-        }
-
-        std::thread& get_os_thread_handle(std::size_t num_thread) const
-        {
-            std::lock_guard<mutex_type> lk(mtx_);
-            pool_id_type id = threads_lookup_[num_thread];
-            thread_pool_base& pool = get_pool(id);
-            return pool.get_os_thread_handle(num_thread);
-        }
+        std::size_t get_os_thread_count() const;
+
+        std::thread& get_os_thread_handle(std::size_t num_thread) const;
 
     public:
         /// API functions forwarding to notification policy
@@ -227,174 +196,87 @@ namespace hpx { namespace threads {
         /// scheduler (which will result in it being passed to the runtime
         /// object, which in turn will report it to the console, etc.).
         void report_error(
-            std::size_t num_thread, std::exception_ptr const& e) const
-        {
-            // propagate the error reporting to all pools, which in turn
-            // will propagate to schedulers
-            for (auto& pool_iter : pools_)
-            {
-                pool_iter->report_error(num_thread, e);
-            }
-        }
+            std::size_t num_thread, std::exception_ptr const& e) const;
 
     public:
         /// Returns the mask identifying all processing units used by this
         /// thread manager.
-        mask_type get_used_processing_units() const
-        {
-            mask_type total_used_processing_punits = mask_type();
-            threads::resize(total_used_processing_punits,
-                static_cast<std::size_t>(hardware_concurrency()));
-
-            for (auto& pool_iter : pools_)
-            {
-                total_used_processing_punits |=
-                    pool_iter->get_used_processing_units();
-            }
-
-            return total_used_processing_punits;
-        }
+        mask_type get_used_processing_units() const;
 
         hwloc_bitmap_ptr get_pool_numa_bitmap(
-            std::string const& pool_name) const
-        {
-            return get_pool(pool_name).get_numa_domain_bitmap();
-        }
-
-        void set_scheduler_mode(threads::policies::scheduler_mode mode) noexcept
-        {
-            for (auto& pool_iter : pools_)
-            {
-                pool_iter->get_scheduler()->set_scheduler_mode(mode);
-            }
-        }
-
-        void add_scheduler_mode(threads::policies::scheduler_mode mode) noexcept
-        {
-            for (auto& pool_iter : pools_)
-            {
-                pool_iter->get_scheduler()->add_scheduler_mode(mode);
-            }
-        }
+            std::string const& pool_name) const;
 
+        void set_scheduler_mode(
+            threads::policies::scheduler_mode mode) const noexcept;
+        void add_scheduler_mode(
+            threads::policies::scheduler_mode mode) const noexcept;
         void add_remove_scheduler_mode(
             threads::policies::scheduler_mode to_add_mode,
-            threads::policies::scheduler_mode to_remove_mode) noexcept
-        {
-            for (auto& pool_iter : pools_)
-            {
-                pool_iter->get_scheduler()->add_remove_scheduler_mode(
-                    to_add_mode, to_remove_mode);
-            }
-        }
-
+            threads::policies::scheduler_mode to_remove_mode) const noexcept;
         void remove_scheduler_mode(
-            threads::policies::scheduler_mode mode) noexcept
-        {
-            for (auto& pool_iter : pools_)
-            {
-                pool_iter->get_scheduler()->remove_scheduler_mode(mode);
-            }
-        }
-
-        void reset_thread_distribution() noexcept
-        {
-            for (auto& pool_iter : pools_)
-            {
-                pool_iter->reset_thread_distribution();
-            }
-        }
-
-        void init_tss(std::size_t global_thread_num)
-        {
-            detail::set_global_thread_num_tss(global_thread_num);
-        }
-
-        void deinit_tss()
-        {
-            detail::set_global_thread_num_tss(std::size_t(-1));
-        }
+            threads::policies::scheduler_mode mode) const noexcept;
+
+        void reset_thread_distribution() const noexcept;
+
+        static void init_tss(std::size_t global_thread_num);
+        static void deinit_tss();
 
     public:
         // performance counters
-        std::int64_t get_queue_length(bool reset);
+        std::int64_t get_queue_length(bool reset) const;
 #ifdef HPX_HAVE_THREAD_QUEUE_WAITTIME
-        std::int64_t get_average_thread_wait_time(bool reset);
-        std::int64_t get_average_task_wait_time(bool reset);
+        std::int64_t get_average_thread_wait_time(bool reset) const;
+        std::int64_t get_average_task_wait_time(bool reset) const;
 #endif
 #if defined(HPX_HAVE_BACKGROUND_THREAD_COUNTERS) &&                            \
     defined(HPX_HAVE_THREAD_IDLE_RATES)
-        std::int64_t get_background_work_duration(bool reset);
-        std::int64_t get_background_overhead(bool reset);
+        std::int64_t get_background_work_duration(bool reset) const;
+        std::int64_t get_background_overhead(bool reset) const;
 
-        std::int64_t get_background_send_duration(bool reset);
-        std::int64_t get_background_send_overhead(bool reset);
+        std::int64_t get_background_send_duration(bool reset) const;
+        std::int64_t get_background_send_overhead(bool reset) const;
 
-        std::int64_t get_background_receive_duration(bool reset);
-        std::int64_t get_background_receive_overhead(bool reset);
+        std::int64_t get_background_receive_duration(bool reset) const;
+        std::int64_t get_background_receive_overhead(bool reset) const;
 #endif    //HPX_HAVE_BACKGROUND_THREAD_COUNTERS
 
-        std::int64_t get_cumulative_duration(bool reset);
-
-        std::int64_t get_thread_count_unknown(bool reset)
-        {
-            return get_thread_count(thread_schedule_state::unknown,
-                thread_priority::default_, std::size_t(-1), reset);
-        }
-        std::int64_t get_thread_count_active(bool reset)
-        {
-            return get_thread_count(thread_schedule_state::active,
-                thread_priority::default_, std::size_t(-1), reset);
-        }
-        std::int64_t get_thread_count_pending(bool reset)
-        {
-            return get_thread_count(thread_schedule_state::pending,
-                thread_priority::default_, std::size_t(-1), reset);
-        }
-        std::int64_t get_thread_count_suspended(bool reset)
-        {
-            return get_thread_count(thread_schedule_state::suspended,
-                thread_priority::default_, std::size_t(-1), reset);
-        }
-        std::int64_t get_thread_count_terminated(bool reset)
-        {
-            return get_thread_count(thread_schedule_state::terminated,
-                thread_priority::default_, std::size_t(-1), reset);
-        }
-        std::int64_t get_thread_count_staged(bool reset)
-        {
-            return get_thread_count(thread_schedule_state::staged,
-                thread_priority::default_, std::size_t(-1), reset);
-        }
+        std::int64_t get_cumulative_duration(bool reset) const;
+
+        std::int64_t get_thread_count_unknown(bool reset) const;
+        std::int64_t get_thread_count_active(bool reset) const;
+        std::int64_t get_thread_count_pending(bool reset) const;
+        std::int64_t get_thread_count_suspended(bool reset) const;
+        std::int64_t get_thread_count_terminated(bool reset) const;
+        std::int64_t get_thread_count_staged(bool reset) const;
 
 #ifdef HPX_HAVE_THREAD_IDLE_RATES
-        std::int64_t avg_idle_rate(bool reset) noexcept;
+        std::int64_t avg_idle_rate(bool reset) const noexcept;
 #ifdef HPX_HAVE_THREAD_CREATION_AND_CLEANUP_RATES
-        std::int64_t avg_creation_idle_rate(bool reset) noexcept;
-        std::int64_t avg_cleanup_idle_rate(bool reset) noexcept;
+        std::int64_t avg_creation_idle_rate(bool reset) const noexcept;
+        std::int64_t avg_cleanup_idle_rate(bool reset) const noexcept;
 #endif
 #endif
 
 #ifdef HPX_HAVE_THREAD_CUMULATIVE_COUNTS
-        std::int64_t get_executed_threads(bool reset) noexcept;
-        std::int64_t get_executed_thread_phases(bool reset) noexcept;
+        std::int64_t get_executed_threads(bool reset) const noexcept;
+        std::int64_t get_executed_thread_phases(bool reset) const noexcept;
 #ifdef HPX_HAVE_THREAD_IDLE_RATES
-        std::int64_t get_thread_duration(bool reset);
-        std::int64_t get_thread_phase_duration(bool reset);
-        std::int64_t get_thread_overhead(bool reset);
-        std::int64_t get_thread_phase_overhead(bool reset);
-        std::int64_t get_cumulative_thread_duration(bool reset);
-        std::int64_t get_cumulative_thread_overhead(bool reset);
+        std::int64_t get_thread_duration(bool reset) const;
+        std::int64_t get_thread_phase_duration(bool reset) const;
+        std::int64_t get_thread_overhead(bool reset) const;
+        std::int64_t get_thread_phase_overhead(bool reset) const;
+        std::int64_t get_cumulative_thread_duration(bool reset) const;
+        std::int64_t get_cumulative_thread_overhead(bool reset) const;
 #endif
 #endif
 
 #ifdef HPX_HAVE_THREAD_STEALING_COUNTS
-        std::int64_t get_num_pending_misses(bool reset);
-        std::int64_t get_num_pending_accesses(bool reset);
-        std::int64_t get_num_stolen_from_pending(bool reset);
-        std::int64_t get_num_stolen_from_staged(bool reset);
-        std::int64_t get_num_stolen_to_pending(bool reset);
-        std::int64_t get_num_stolen_to_staged(bool reset);
+        std::int64_t get_num_pending_misses(bool reset) const;
+        std::int64_t get_num_pending_accesses(bool reset) const;
+        std::int64_t get_num_stolen_from_pending(bool reset) const;
+        std::int64_t get_num_stolen_from_staged(bool reset) const;
+        std::int64_t get_num_stolen_to_pending(bool reset) const;
+        std::int64_t get_num_stolen_to_staged(bool reset) const;
 #endif
 
     private:
@@ -431,6 +313,9 @@ namespace hpx { namespace threads {
         void create_scheduler_local_workrequesting_lifo(
             thread_pool_init_parameters const&,
             policies::thread_queue_init_parameters const&, std::size_t);
+        void create_scheduler_local_workrequesting_mc(
+            thread_pool_init_parameters const&,
+            policies::thread_queue_init_parameters const&, std::size_t);
 
         mutable mutex_type mtx_;    // mutex protecting the members
 
@@ -445,6 +330,6 @@ namespace hpx { namespace threads {
         notification_policy_type& notifier_;
         detail::network_background_callback_type network_background_callback_;
     };
-}}    // namespace hpx::threads
+}    // namespace hpx::threads
 
 #include <hpx/config/warnings_suffix.hpp>
diff --git a/libs/core/threadmanager/include/hpx/threadmanager/threadmanager_fwd.hpp b/libs/core/threadmanager/include/hpx/threadmanager/threadmanager_fwd.hpp
index b02bf0193319..ecac5dd2c9fa 100644
--- a/libs/core/threadmanager/include/hpx/threadmanager/threadmanager_fwd.hpp
+++ b/libs/core/threadmanager/include/hpx/threadmanager/threadmanager_fwd.hpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2020 Hartmut Kaiser
+//  Copyright (c) 2020-2024 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -8,10 +8,10 @@
 
 #include <hpx/config.hpp>
 
-namespace hpx { namespace threads {
+namespace hpx::threads {
 
     // The thread-manager class is the central instance of management for
     // all (non-depleted) threads
     class HPX_CORE_EXPORT threadmanager;
 
-}}    // namespace hpx::threads
+}    // namespace hpx::threads
diff --git a/libs/core/threadmanager/src/threadmanager.cpp b/libs/core/threadmanager/src/threadmanager.cpp
index 1fde4443d865..c0ac17d12c4c 100644
--- a/libs/core/threadmanager/src/threadmanager.cpp
+++ b/libs/core/threadmanager/src/threadmanager.cpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2023 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //  Copyright (c)      2011 Bryce Lelbach, Katelyn Kufahl
 //  Copyright (c) 2008-2009 Chirag Dekate, Anshul Tandon
 //  Copyright (c) 2015 Patricia Grubel
@@ -13,7 +13,6 @@
 #include <hpx/async_combinators/wait_all.hpp>
 #include <hpx/execution_base/this_thread.hpp>
 #include <hpx/futures/future.hpp>
-#include <hpx/hardware/timestamp.hpp>
 #include <hpx/io_service/io_service_pool.hpp>
 #include <hpx/modules/errors.hpp>
 #include <hpx/modules/logging.hpp>
@@ -23,7 +22,6 @@
 #include <hpx/runtime_configuration/runtime_configuration.hpp>
 #include <hpx/thread_pool_util/thread_pool_suspension_helpers.hpp>
 #include <hpx/thread_pools/scheduled_thread_pool.hpp>
-#include <hpx/threading_base/set_thread_state.hpp>
 #include <hpx/threading_base/thread_data.hpp>
 #include <hpx/threading_base/thread_helpers.hpp>
 #include <hpx/threading_base/thread_init_data.hpp>
@@ -35,6 +33,7 @@
 
 #include <cstddef>
 #include <cstdint>
+#include <exception>
 #include <functional>
 #include <iosfwd>
 #include <memory>
@@ -44,7 +43,7 @@
 #include <utility>
 #include <vector>
 
-namespace hpx { namespace threads {
+namespace hpx::threads {
 
     namespace detail {
         void check_num_high_priority_queues(
@@ -67,7 +66,8 @@ namespace hpx { namespace threads {
         util::io_service_pool& timer_pool,
 #endif
         notification_policy_type& notifier,
-        detail::network_background_callback_type network_background_callback)
+        detail::network_background_callback_type const&
+            network_background_callback)
       : rtcfg_(rtcfg)
 #ifdef HPX_HAVE_TIMER_POOL
       , timer_pool_(timer_pool)
@@ -80,9 +80,9 @@ namespace hpx { namespace threads {
 
         // Add callbacks local to threadmanager.
         notifier.add_on_start_thread_callback(
-            hpx::bind(&threadmanager::init_tss, this, _1));
+            hpx::bind(&threadmanager::init_tss, _1));
         notifier.add_on_stop_thread_callback(
-            hpx::bind(&threadmanager::deinit_tss, this));
+            hpx::bind(&threadmanager::deinit_tss));
 
         auto& rp = hpx::resource::get_partitioner();
         notifier.add_on_start_thread_callback(hpx::bind(
@@ -133,13 +133,13 @@ namespace hpx { namespace threads {
         double const max_idle_backoff_time = hpx::util::get_entry_as<double>(
             rtcfg_, "hpx.max_idle_backoff_time", HPX_IDLE_BACKOFF_TIME_MAX);
 
-        std::ptrdiff_t small_stacksize =
+        std::ptrdiff_t const small_stacksize =
             rtcfg_.get_stack_size(thread_stacksize::small_);
-        std::ptrdiff_t medium_stacksize =
+        std::ptrdiff_t const medium_stacksize =
             rtcfg_.get_stack_size(thread_stacksize::medium);
-        std::ptrdiff_t large_stacksize =
+        std::ptrdiff_t const large_stacksize =
             rtcfg_.get_stack_size(thread_stacksize::large);
-        std::ptrdiff_t huge_stacksize =
+        std::ptrdiff_t const huge_stacksize =
             rtcfg_.get_stack_size(thread_stacksize::huge);
 
         return policies::thread_queue_init_parameters(max_thread_count,
@@ -173,8 +173,7 @@ namespace hpx { namespace threads {
             thread_pool_init.num_threads_, thread_pool_init.affinity_data_,
             thread_queue_init, "core-local_queue_scheduler");
 
-        std::unique_ptr<local_sched_type> sched =
-            std::make_unique<local_sched_type>(init);
+        auto sched = std::make_unique<local_sched_type>(init);
 
         // set the default scheduler flags
         sched->set_scheduler_mode(thread_pool_init.mode_);
@@ -197,7 +196,7 @@ namespace hpx { namespace threads {
     {
         // set parameters for scheduler and pool instantiation and perform
         // compatibility checks
-        std::size_t num_high_priority_queues =
+        std::size_t const num_high_priority_queues =
             hpx::util::get_entry_as<std::size_t>(rtcfg_,
                 "hpx.thread_queue.high_priority_queues",
                 thread_pool_init.num_threads_);
@@ -213,10 +212,9 @@ namespace hpx { namespace threads {
         local_sched_type::init_parameter_type init(
             thread_pool_init.num_threads_, thread_pool_init.affinity_data_,
             num_high_priority_queues, thread_queue_init,
-            "core-local_priority_queue_scheduler");
+            "core-local_priority_queue_scheduler-fifo");
 
-        std::unique_ptr<local_sched_type> sched =
-            std::make_unique<local_sched_type>(init);
+        auto sched = std::make_unique<local_sched_type>(init);
 
         // set the default scheduler flags
         sched->set_scheduler_mode(thread_pool_init.mode_);
@@ -241,7 +239,7 @@ namespace hpx { namespace threads {
 #if defined(HPX_HAVE_CXX11_STD_ATOMIC_128BIT)
         // set parameters for scheduler and pool instantiation and perform
         // compatibility checks
-        std::size_t num_high_priority_queues =
+        std::size_t const num_high_priority_queues =
             hpx::util::get_entry_as<std::size_t>(rtcfg_,
                 "hpx.thread_queue.high_priority_queues",
                 thread_pool_init.num_threads_);
@@ -256,10 +254,9 @@ namespace hpx { namespace threads {
         local_sched_type::init_parameter_type init(
             thread_pool_init.num_threads_, thread_pool_init.affinity_data_,
             num_high_priority_queues, thread_queue_init,
-            "core-local_priority_queue_scheduler");
+            "core-local_priority_queue_scheduler-lifo");
 
-        std::unique_ptr<local_sched_type> sched =
-            std::make_unique<local_sched_type>(init);
+        auto sched = std::make_unique<local_sched_type>(init);
 
         // set the default scheduler flags
         sched->set_scheduler_mode(thread_pool_init.mode_);
@@ -298,8 +295,7 @@ namespace hpx { namespace threads {
             using local_sched_type =
                 hpx::threads::policies::background_scheduler<>;
 
-            std::unique_ptr<local_sched_type> sched =
-                std::make_unique<local_sched_type>(init);
+            auto sched = std::make_unique<local_sched_type>(init);
 
             // set the default scheduler flags
             sched->set_scheduler_mode(thread_pool_init.mode_);
@@ -314,8 +310,7 @@ namespace hpx { namespace threads {
             using local_sched_type =
                 hpx::threads::policies::static_queue_scheduler<>;
 
-            std::unique_ptr<local_sched_type> sched =
-                std::make_unique<local_sched_type>(init);
+            auto sched = std::make_unique<local_sched_type>(init);
 
             // set the default scheduler flags
             sched->set_scheduler_mode(thread_pool_init.mode_);
@@ -341,7 +336,7 @@ namespace hpx { namespace threads {
     {
         // set parameters for scheduler and pool instantiation and perform
         // compatibility checks
-        std::size_t num_high_priority_queues =
+        std::size_t const num_high_priority_queues =
             hpx::util::get_entry_as<std::size_t>(rtcfg_,
                 "hpx.thread_queue.high_priority_queues",
                 thread_pool_init.num_threads_);
@@ -357,8 +352,7 @@ namespace hpx { namespace threads {
             num_high_priority_queues, thread_queue_init,
             "core-static_priority_queue_scheduler");
 
-        std::unique_ptr<local_sched_type> sched =
-            std::make_unique<local_sched_type>(init);
+        auto sched = std::make_unique<local_sched_type>(init);
 
         // set the default scheduler flags
         sched->set_scheduler_mode(thread_pool_init.mode_);
@@ -383,7 +377,7 @@ namespace hpx { namespace threads {
 #if defined(HPX_HAVE_CXX11_STD_ATOMIC_128BIT)
         // set parameters for scheduler and pool instantiation and perform
         // compatibility checks
-        std::size_t num_high_priority_queues =
+        std::size_t const num_high_priority_queues =
             hpx::util::get_entry_as<std::size_t>(rtcfg_,
                 "hpx.thread_queue.high_priority_queues",
                 thread_pool_init.num_threads_);
@@ -398,10 +392,9 @@ namespace hpx { namespace threads {
         local_sched_type::init_parameter_type init(
             thread_pool_init.num_threads_, thread_pool_init.affinity_data_,
             num_high_priority_queues, thread_queue_init,
-            "core-abp_fifo_priority_queue_scheduler");
+            "core-abp_priority_queue_scheduler-fifo");
 
-        std::unique_ptr<local_sched_type> sched =
-            std::make_unique<local_sched_type>(init);
+        auto sched = std::make_unique<local_sched_type>(init);
 
         // set the default scheduler flags
         sched->set_scheduler_mode(thread_pool_init.mode_);
@@ -432,7 +425,7 @@ namespace hpx { namespace threads {
 #if defined(HPX_HAVE_CXX11_STD_ATOMIC_128BIT)
         // set parameters for scheduler and pool instantiation and perform
         // compatibility checks
-        std::size_t num_high_priority_queues =
+        std::size_t const num_high_priority_queues =
             hpx::util::get_entry_as<std::size_t>(rtcfg_,
                 "hpx.thread_queue.high_priority_queues",
                 thread_pool_init.num_threads_);
@@ -447,10 +440,9 @@ namespace hpx { namespace threads {
         local_sched_type::init_parameter_type init(
             thread_pool_init.num_threads_, thread_pool_init.affinity_data_,
             num_high_priority_queues, thread_queue_init,
-            "core-abp_fifo_priority_queue_scheduler");
+            "core-abp_priority_queue_scheduler-lifo");
 
-        std::unique_ptr<local_sched_type> sched =
-            std::make_unique<local_sched_type>(init);
+        auto sched = std::make_unique<local_sched_type>(init);
 
         // set the default scheduler flags
         sched->set_scheduler_mode(thread_pool_init.mode_);
@@ -478,15 +470,14 @@ namespace hpx { namespace threads {
         std::size_t numa_sensitive)
     {
         // instantiate the scheduler
-        typedef hpx::threads::policies::shared_priority_queue_scheduler<>
-            local_sched_type;
+        using local_sched_type =
+            hpx::threads::policies::shared_priority_queue_scheduler<>;
         local_sched_type::init_parameter_type init(
             thread_pool_init.num_threads_, {1, 1, 1},
             thread_pool_init.affinity_data_, thread_queue_init,
             "core-shared_priority_queue_scheduler");
 
-        std::unique_ptr<local_sched_type> sched =
-            std::make_unique<local_sched_type>(init);
+        auto sched = std::make_unique<local_sched_type>(init);
 
         // set the default scheduler flags
         sched->set_scheduler_mode(thread_pool_init.mode_);
@@ -507,9 +498,9 @@ namespace hpx { namespace threads {
         policies::thread_queue_init_parameters const& thread_queue_init,
         std::size_t numa_sensitive)
     {
-        // set parameters for scheduler and pool instantiation and
-        // perform compatibility checks
-        std::size_t num_high_priority_queues =
+        // set parameters for scheduler and pool instantiation and perform
+        // compatibility checks
+        std::size_t const num_high_priority_queues =
             hpx::util::get_entry_as<std::size_t>(rtcfg_,
                 "hpx.thread_queue.high_priority_queues",
                 thread_pool_init.num_threads_);
@@ -520,12 +511,52 @@ namespace hpx { namespace threads {
         using local_sched_type =
             hpx::threads::policies::local_workrequesting_scheduler<>;
 
-        local_sched_type::init_parameter_type init(
+        local_sched_type::init_parameter_type const init(
             thread_pool_init.num_threads_, thread_pool_init.affinity_data_,
             num_high_priority_queues, thread_queue_init,
-            "core-local_workrequesting_scheduler");
+            "core-local_workrequesting_scheduler-fifo");
 
-        std::unique_ptr<local_sched_type> sched(new local_sched_type(init));
+        auto sched = std::make_unique<local_sched_type>(init);
+
+        // set the default scheduler flags
+        sched->set_scheduler_mode(thread_pool_init.mode_);
+
+        // conditionally set/unset this flag
+        sched->update_scheduler_mode(
+            policies::scheduler_mode::enable_stealing_numa, !numa_sensitive);
+
+        // instantiate the pool
+        std::unique_ptr<thread_pool_base> pool = std::make_unique<
+            hpx::threads::detail::scheduled_thread_pool<local_sched_type>>(
+            HPX_MOVE(sched), thread_pool_init);
+        pools_.push_back(HPX_MOVE(pool));
+    }
+
+    void threadmanager::create_scheduler_local_workrequesting_mc(
+        thread_pool_init_parameters const& thread_pool_init,
+        policies::thread_queue_init_parameters const& thread_queue_init,
+        std::size_t numa_sensitive)
+    {
+        // set parameters for scheduler and pool instantiation and perform
+        // compatibility checks
+        std::size_t const num_high_priority_queues =
+            hpx::util::get_entry_as<std::size_t>(rtcfg_,
+                "hpx.thread_queue.high_priority_queues",
+                thread_pool_init.num_threads_);
+        detail::check_num_high_priority_queues(
+            thread_pool_init.num_threads_, num_high_priority_queues);
+
+        // instantiate the scheduler
+        using local_sched_type =
+            hpx::threads::policies::local_workrequesting_scheduler<std::mutex,
+                hpx::threads::policies::concurrentqueue_fifo>;
+
+        local_sched_type::init_parameter_type const init(
+            thread_pool_init.num_threads_, thread_pool_init.affinity_data_,
+            num_high_priority_queues, thread_queue_init,
+            "core-local_workrequesting_scheduler-mc");
+
+        auto sched = std::make_unique<local_sched_type>(init);
 
         // set the default scheduler flags
         sched->set_scheduler_mode(thread_pool_init.mode_);
@@ -548,7 +579,7 @@ namespace hpx { namespace threads {
     {
         // set parameters for scheduler and pool instantiation and
         // perform compatibility checks
-        std::size_t num_high_priority_queues =
+        std::size_t const num_high_priority_queues =
             hpx::util::get_entry_as<std::size_t>(rtcfg_,
                 "hpx.thread_queue.high_priority_queues",
                 thread_pool_init.num_threads_);
@@ -560,12 +591,12 @@ namespace hpx { namespace threads {
             hpx::threads::policies::local_workrequesting_scheduler<std::mutex,
                 hpx::threads::policies::lockfree_lifo>;
 
-        local_sched_type::init_parameter_type init(
+        local_sched_type::init_parameter_type const init(
             thread_pool_init.num_threads_, thread_pool_init.affinity_data_,
             num_high_priority_queues, thread_queue_init,
-            "core-local_workrequesting_scheduler");
+            "core-local_workrequesting_scheduler-lifo");
 
-        std::unique_ptr<local_sched_type> sched(new local_sched_type(init));
+        auto sched = std::make_unique<local_sched_type>(init);
 
         // set the default scheduler flags
         sched->set_scheduler_mode(thread_pool_init.mode_);
@@ -584,7 +615,7 @@ namespace hpx { namespace threads {
     void threadmanager::create_pools()
     {
         auto& rp = hpx::resource::get_partitioner();
-        size_t num_pools = rp.get_num_pools();
+        size_t const num_pools = rp.get_num_pools();
         std::size_t thread_offset = 0;
         std::size_t const max_idle_loop_count =
             hpx::util::get_entry_as<std::int64_t>(
@@ -593,10 +624,10 @@ namespace hpx { namespace threads {
             hpx::util::get_entry_as<std::int64_t>(
                 rtcfg_, "hpx.max_busy_loop_count", HPX_BUSY_LOOP_COUNT_MAX);
 
-        std::size_t numa_sensitive = hpx::util::get_entry_as<std::size_t>(
+        std::size_t const numa_sensitive = hpx::util::get_entry_as<std::size_t>(
             rtcfg_, "hpx.numa_sensitive", 0);
 
-        policies::thread_queue_init_parameters thread_queue_init =
+        policies::thread_queue_init_parameters const thread_queue_init =
             get_init_parameters();
 
         std::size_t max_background_threads =
@@ -613,9 +644,11 @@ namespace hpx { namespace threads {
         for (size_t i = 0; i != num_pools; i++)
         {
             std::string name = rp.get_pool_name(i);
-            resource::scheduling_policy sched_type = rp.which_scheduler(name);
+            resource::scheduling_policy const sched_type =
+                rp.which_scheduler(name);
             std::size_t num_threads_in_pool = rp.get_num_threads(i);
-            policies::scheduler_mode scheduler_mode = rp.get_scheduler_mode(i);
+            policies::scheduler_mode const scheduler_mode =
+                rp.get_scheduler_mode(i);
             resource::background_work_function background_work =
                 rp.get_background_work(i);
 
@@ -653,7 +686,7 @@ namespace hpx { namespace threads {
                     overall_background_work =
                         [this, background_work](
                             std::size_t num_thread) -> bool {
-                        bool result = background_work(num_thread);
+                        bool const result = background_work(num_thread);
                         return network_background_callback_(num_thread) ||
                             result;
                     };
@@ -729,6 +762,11 @@ namespace hpx { namespace threads {
                     thread_pool_init, thread_queue_init, numa_sensitive);
                 break;
 
+            case resource::scheduling_policy::local_workrequesting_mc:
+                create_scheduler_local_workrequesting_mc(
+                    thread_pool_init, thread_queue_init, numa_sensitive);
+                break;
+
             case resource::scheduling_policy::abp_priority_fifo:
                 create_scheduler_abp_priority_fifo(
                     thread_pool_init, thread_queue_init, numa_sensitive);
@@ -744,13 +782,10 @@ namespace hpx { namespace threads {
                     thread_pool_init, thread_queue_init, numa_sensitive);
                 break;
 
-            default:
-                [[fallthrough]];
             case resource::scheduling_policy::unspecified:
                 throw std::invalid_argument(
                     "cannot instantiate a thread-manager if the thread-pool" +
                     name + " has an unspecified scheduler type");
-                break;
             }
 
             // update the thread_offset for the next pool
@@ -758,9 +793,10 @@ namespace hpx { namespace threads {
         }
 
         // fill the thread-lookup table
-        for (auto& pool_iter : pools_)
+        for (auto const& pool_iter : pools_)
         {
-            std::size_t nt = rp.get_num_threads(pool_iter->get_pool_index());
+            std::size_t const nt =
+                rp.get_num_threads(pool_iter->get_pool_index());
             for (std::size_t i = 0; i < nt; i++)
             {
                 threads_lookup_.emplace_back(pool_iter->get_pool_id());
@@ -770,22 +806,22 @@ namespace hpx { namespace threads {
 
     threadmanager::~threadmanager() = default;
 
-    void threadmanager::init()
+    void threadmanager::init() const
     {
-        auto& rp = hpx::resource::get_partitioner();
+        auto const& rp = hpx::resource::get_partitioner();
         std::size_t threads_offset = 0;
 
         // initialize all pools
         for (auto&& pool_iter : pools_)
         {
-            std::size_t num_threads_in_pool =
+            std::size_t const num_threads_in_pool =
                 rp.get_num_threads(pool_iter->get_pool_index());
             pool_iter->init(num_threads_in_pool, threads_offset);
             threads_offset += num_threads_in_pool;
         }
     }
 
-    void threadmanager::print_pools(std::ostream& os)
+    void threadmanager::print_pools(std::ostream& os) const
     {
         os << "The thread-manager owns " << pools_.size()    //-V128
            << " pool(s) : \n";
@@ -805,7 +841,7 @@ namespace hpx { namespace threads {
     thread_pool_base& threadmanager::get_pool(
         std::string const& pool_name) const
     {
-        // if the given pool_name is default, we don't need to look for it
+        // if the given pool_name is default, we don't need to look for it,
         // we must always return pool 0
         if (pool_name == "default" ||
             pool_name == resource::get_partitioner().get_default_pool_name())
@@ -814,7 +850,7 @@ namespace hpx { namespace threads {
         }
 
         // now check the other pools - no need to check pool 0 again, so ++begin
-        auto pool = std::find_if(++pools_.begin(), pools_.end(),
+        auto const pool = std::find_if(++pools_.begin(), pools_.end(),
             [&pool_name](pool_type const& itp) -> bool {
                 return (itp->get_pool_name() == pool_name);
             });
@@ -843,7 +879,7 @@ namespace hpx { namespace threads {
 
     bool threadmanager::pool_exists(std::string const& pool_name) const
     {
-        // if the given pool_name is default, we don't need to look for it
+        // if the given pool_name is default, we don't need to look for it,
         // we must always return pool 0
         if (pool_name == "default" ||
             pool_name == resource::get_partitioner().get_default_pool_name())
@@ -852,7 +888,7 @@ namespace hpx { namespace threads {
         }
 
         // now check the other pools - no need to check pool 0 again, so ++begin
-        auto pool = std::find_if(++pools_.begin(), pools_.end(),
+        auto const pool = std::find_if(++pools_.begin(), pools_.end(),
             [&pool_name](pool_type const& itp) -> bool {
                 return (itp->get_pool_name() == pool_name);
             });
@@ -872,12 +908,12 @@ namespace hpx { namespace threads {
 
     ///////////////////////////////////////////////////////////////////////////
     std::int64_t threadmanager::get_thread_count(thread_schedule_state state,
-        thread_priority priority, std::size_t num_thread, bool reset)
+        thread_priority priority, std::size_t num_thread, bool reset) const
     {
         std::int64_t total_count = 0;
         std::lock_guard<mutex_type> lk(mtx_);
 
-        for (auto& pool_iter : pools_)
+        for (auto const& pool_iter : pools_)
         {
             total_count +=
                 pool_iter->get_thread_count(state, priority, num_thread, reset);
@@ -886,12 +922,12 @@ namespace hpx { namespace threads {
         return total_count;
     }
 
-    std::int64_t threadmanager::get_idle_core_count()
+    std::int64_t threadmanager::get_idle_core_count() const
     {
         std::int64_t total_count = 0;
         std::lock_guard<mutex_type> lk(mtx_);
 
-        for (auto& pool_iter : pools_)
+        for (auto const& pool_iter : pools_)
         {
             total_count += pool_iter->get_idle_core_count();
         }
@@ -899,14 +935,14 @@ namespace hpx { namespace threads {
         return total_count;
     }
 
-    mask_type threadmanager::get_idle_core_mask()
+    mask_type threadmanager::get_idle_core_mask() const
     {
         mask_type mask = mask_type();
         resize(mask, hardware_concurrency());
 
         std::lock_guard<mutex_type> lk(mtx_);
 
-        for (auto& pool_iter : pools_)
+        for (auto const& pool_iter : pools_)
         {
             pool_iter->get_idle_core_mask(mask);
         }
@@ -948,10 +984,10 @@ namespace hpx { namespace threads {
     // Abort all threads which are in suspended state. This will set
     // the state of all suspended threads to \a pending while
     // supplying the wait_abort extended state flag
-    void threadmanager::abort_all_suspended_threads()
+    void threadmanager::abort_all_suspended_threads() const
     {
         std::lock_guard<mutex_type> lk(mtx_);
-        for (auto& pool_iter : pools_)
+        for (auto const& pool_iter : pools_)
         {
             pool_iter->abort_all_suspended_threads();
         }
@@ -962,12 +998,12 @@ namespace hpx { namespace threads {
     // have been terminated but which are still held in the queue
     // of terminated threads. Some schedulers might not do anything
     // here.
-    bool threadmanager::cleanup_terminated(bool delete_all)
+    bool threadmanager::cleanup_terminated(bool delete_all) const
     {
         std::lock_guard<mutex_type> lk(mtx_);
         bool result = true;
 
-        for (auto& pool_iter : pools_)
+        for (auto const& pool_iter : pools_)
         {
             result = pool_iter->cleanup_terminated(delete_all) && result;
         }
@@ -975,13 +1011,110 @@ namespace hpx { namespace threads {
         return result;
     }
 
+    std::size_t threadmanager::get_os_thread_count() const
+    {
+        std::lock_guard<mutex_type> lk(mtx_);
+        std::size_t total = 0;
+        for (auto& pool_iter : pools_)
+        {
+            total += pool_iter->get_os_thread_count();
+        }
+        return total;
+    }
+
+    std::thread& threadmanager::get_os_thread_handle(
+        std::size_t num_thread) const
+    {
+        std::lock_guard<mutex_type> lk(mtx_);
+        pool_id_type const id = threads_lookup_[num_thread];
+        thread_pool_base& pool = get_pool(id);
+        return pool.get_os_thread_handle(num_thread);
+    }
+
+    void threadmanager::report_error(
+        std::size_t num_thread, std::exception_ptr const& e) const
+    {
+        // propagate the error reporting to all pools, which in turn
+        // will propagate to schedulers
+        for (auto& pool_iter : pools_)
+        {
+            pool_iter->report_error(num_thread, e);
+        }
+    }
+
+    mask_type threadmanager::get_used_processing_units() const
+    {
+        auto total_used_processing_punits = mask_type();
+        threads::resize(total_used_processing_punits,
+            static_cast<std::size_t>(hardware_concurrency()));
+
+        for (auto& pool_iter : pools_)
+        {
+            total_used_processing_punits |=
+                pool_iter->get_used_processing_units();
+        }
+
+        return total_used_processing_punits;
+    }
+
+    hwloc_bitmap_ptr threadmanager::get_pool_numa_bitmap(
+        std::string const& pool_name) const
+    {
+        return get_pool(pool_name).get_numa_domain_bitmap();
+    }
+
+    void threadmanager::set_scheduler_mode(
+        threads::policies::scheduler_mode mode) const noexcept
+    {
+        for (auto const& pool_iter : pools_)
+        {
+            pool_iter->get_scheduler()->set_scheduler_mode(mode);
+        }
+    }
+
+    void threadmanager::add_scheduler_mode(
+        threads::policies::scheduler_mode mode) const noexcept
+    {
+        for (auto const& pool_iter : pools_)
+        {
+            pool_iter->get_scheduler()->add_scheduler_mode(mode);
+        }
+    }
+
+    void threadmanager::add_remove_scheduler_mode(
+        threads::policies::scheduler_mode to_add_mode,
+        threads::policies::scheduler_mode to_remove_mode) const noexcept
+    {
+        for (auto const& pool_iter : pools_)
+        {
+            pool_iter->get_scheduler()->add_remove_scheduler_mode(
+                to_add_mode, to_remove_mode);
+        }
+    }
+
+    void threadmanager::remove_scheduler_mode(
+        threads::policies::scheduler_mode mode) const noexcept
+    {
+        for (auto const& pool_iter : pools_)
+        {
+            pool_iter->get_scheduler()->remove_scheduler_mode(mode);
+        }
+    }
+
+    void threadmanager::reset_thread_distribution() const noexcept
+    {
+        for (auto const& pool_iter : pools_)
+        {
+            pool_iter->reset_thread_distribution();
+        }
+    }
+
     ///////////////////////////////////////////////////////////////////////////
     void threadmanager::register_thread(
-        thread_init_data& data, thread_id_ref_type& id, error_code& ec)
+        thread_init_data& data, thread_id_ref_type& id, error_code& ec) const
     {
-        thread_pool_base* pool = nullptr;
-        auto thrd_data = get_self_id_data();
-        if (thrd_data)
+        thread_pool_base* pool;
+        if (auto const* thrd_data = get_self_id_data())
         {
             pool = thrd_data->get_scheduler_base()->get_parent_pool();
         }
@@ -994,11 +1127,10 @@ namespace hpx { namespace threads {
 
     ///////////////////////////////////////////////////////////////////////////
     thread_id_ref_type threadmanager::register_work(
-        thread_init_data& data, error_code& ec)
+        thread_init_data& data, error_code& ec) const
     {
-        thread_pool_base* pool = nullptr;
-        auto thrd_data = get_self_id_data();
-        if (thrd_data)
+        thread_pool_base* pool;
+        if (auto const* thrd_data = get_self_id_data())
         {
             pool = thrd_data->get_scheduler_base()->get_parent_pool();
         }
@@ -1009,10 +1141,20 @@ namespace hpx { namespace threads {
         return pool->create_work(data, ec);
     }
 
+    void threadmanager::init_tss(std::size_t global_thread_num)
+    {
+        detail::set_global_thread_num_tss(global_thread_num);
+    }
+
+    void threadmanager::deinit_tss()
+    {
+        detail::set_global_thread_num_tss(static_cast<std::size_t>(-1));
+    }
+
     ///////////////////////////////////////////////////////////////////////////
-    constexpr std::size_t all_threads = std::size_t(-1);
+    inline constexpr std::size_t all_threads = static_cast<std::size_t>(-1);
 
-    std::int64_t threadmanager::get_queue_length(bool reset)
+    std::int64_t threadmanager::get_queue_length(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1039,7 +1181,7 @@ namespace hpx { namespace threads {
     }
 #endif
 
-    std::int64_t threadmanager::get_cumulative_duration(bool reset)
+    std::int64_t threadmanager::get_cumulative_duration(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1047,9 +1189,45 @@ namespace hpx { namespace threads {
         return result;
     }
 
+    std::int64_t threadmanager::get_thread_count_unknown(bool reset) const
+    {
+        return get_thread_count(thread_schedule_state::unknown,
+            thread_priority::default_, static_cast<std::size_t>(-1), reset);
+    }
+
+    std::int64_t threadmanager::get_thread_count_active(bool reset) const
+    {
+        return get_thread_count(thread_schedule_state::active,
+            thread_priority::default_, static_cast<std::size_t>(-1), reset);
+    }
+
+    std::int64_t threadmanager::get_thread_count_pending(bool reset) const
+    {
+        return get_thread_count(thread_schedule_state::pending,
+            thread_priority::default_, static_cast<std::size_t>(-1), reset);
+    }
+
+    std::int64_t threadmanager::get_thread_count_suspended(bool reset) const
+    {
+        return get_thread_count(thread_schedule_state::suspended,
+            thread_priority::default_, static_cast<std::size_t>(-1), reset);
+    }
+
+    std::int64_t threadmanager::get_thread_count_terminated(bool reset) const
+    {
+        return get_thread_count(thread_schedule_state::terminated,
+            thread_priority::default_, static_cast<std::size_t>(-1), reset);
+    }
+
+    std::int64_t threadmanager::get_thread_count_staged(bool reset) const
+    {
+        return get_thread_count(thread_schedule_state::staged,
+            thread_priority::default_, static_cast<std::size_t>(-1), reset);
+    }
+
 #if defined(HPX_HAVE_BACKGROUND_THREAD_COUNTERS) &&                            \
     defined(HPX_HAVE_THREAD_IDLE_RATES)
-    std::int64_t threadmanager::get_background_work_duration(bool reset)
+    std::int64_t threadmanager::get_background_work_duration(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1058,7 +1236,7 @@ namespace hpx { namespace threads {
         return result;
     }
 
-    std::int64_t threadmanager::get_background_overhead(bool reset)
+    std::int64_t threadmanager::get_background_overhead(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1066,7 +1244,7 @@ namespace hpx { namespace threads {
         return result;
     }
 
-    std::int64_t threadmanager::get_background_send_duration(bool reset)
+    std::int64_t threadmanager::get_background_send_duration(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1075,7 +1253,7 @@ namespace hpx { namespace threads {
         return result;
     }
 
-    std::int64_t threadmanager::get_background_send_overhead(bool reset)
+    std::int64_t threadmanager::get_background_send_overhead(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1084,7 +1262,8 @@ namespace hpx { namespace threads {
         return result;
     }
 
-    std::int64_t threadmanager::get_background_receive_duration(bool reset)
+    std::int64_t threadmanager::get_background_receive_duration(
+        bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1093,7 +1272,8 @@ namespace hpx { namespace threads {
         return result;
     }
 
-    std::int64_t threadmanager::get_background_receive_overhead(bool reset)
+    std::int64_t threadmanager::get_background_receive_overhead(
+        bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1104,7 +1284,7 @@ namespace hpx { namespace threads {
 #endif    // HPX_HAVE_BACKGROUND_THREAD_COUNTERS
 
 #ifdef HPX_HAVE_THREAD_IDLE_RATES
-    std::int64_t threadmanager::avg_idle_rate(bool reset) noexcept
+    std::int64_t threadmanager::avg_idle_rate(bool reset) const noexcept
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1113,7 +1293,8 @@ namespace hpx { namespace threads {
     }
 
 #ifdef HPX_HAVE_THREAD_CREATION_AND_CLEANUP_RATES
-    std::int64_t threadmanager::avg_creation_idle_rate(bool reset) noexcept
+    std::int64_t threadmanager::avg_creation_idle_rate(
+        bool reset) const noexcept
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1121,7 +1302,7 @@ namespace hpx { namespace threads {
         return result;
     }
 
-    std::int64_t threadmanager::avg_cleanup_idle_rate(bool reset) noexcept
+    std::int64_t threadmanager::avg_cleanup_idle_rate(bool reset) const noexcept
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1132,7 +1313,7 @@ namespace hpx { namespace threads {
 #endif
 
 #ifdef HPX_HAVE_THREAD_CUMULATIVE_COUNTS
-    std::int64_t threadmanager::get_executed_threads(bool reset) noexcept
+    std::int64_t threadmanager::get_executed_threads(bool reset) const noexcept
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1140,7 +1321,8 @@ namespace hpx { namespace threads {
         return result;
     }
 
-    std::int64_t threadmanager::get_executed_thread_phases(bool reset) noexcept
+    std::int64_t threadmanager::get_executed_thread_phases(
+        bool reset) const noexcept
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1149,7 +1331,7 @@ namespace hpx { namespace threads {
     }
 
 #ifdef HPX_HAVE_THREAD_IDLE_RATES
-    std::int64_t threadmanager::get_thread_duration(bool reset)
+    std::int64_t threadmanager::get_thread_duration(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1157,7 +1339,7 @@ namespace hpx { namespace threads {
         return result;
     }
 
-    std::int64_t threadmanager::get_thread_phase_duration(bool reset)
+    std::int64_t threadmanager::get_thread_phase_duration(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1165,7 +1347,7 @@ namespace hpx { namespace threads {
         return result;
     }
 
-    std::int64_t threadmanager::get_thread_overhead(bool reset)
+    std::int64_t threadmanager::get_thread_overhead(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1173,7 +1355,7 @@ namespace hpx { namespace threads {
         return result;
     }
 
-    std::int64_t threadmanager::get_thread_phase_overhead(bool reset)
+    std::int64_t threadmanager::get_thread_phase_overhead(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1181,7 +1363,7 @@ namespace hpx { namespace threads {
         return result;
     }
 
-    std::int64_t threadmanager::get_cumulative_thread_duration(bool reset)
+    std::int64_t threadmanager::get_cumulative_thread_duration(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1190,7 +1372,7 @@ namespace hpx { namespace threads {
         return result;
     }
 
-    std::int64_t threadmanager::get_cumulative_thread_overhead(bool reset)
+    std::int64_t threadmanager::get_cumulative_thread_overhead(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1202,7 +1384,7 @@ namespace hpx { namespace threads {
 #endif
 
 #ifdef HPX_HAVE_THREAD_STEALING_COUNTS
-    std::int64_t threadmanager::get_num_pending_misses(bool reset)
+    std::int64_t threadmanager::get_num_pending_misses(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1210,7 +1392,7 @@ namespace hpx { namespace threads {
         return result;
     }
 
-    std::int64_t threadmanager::get_num_pending_accesses(bool reset)
+    std::int64_t threadmanager::get_num_pending_accesses(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1218,7 +1400,7 @@ namespace hpx { namespace threads {
         return result;
     }
 
-    std::int64_t threadmanager::get_num_stolen_from_pending(bool reset)
+    std::int64_t threadmanager::get_num_stolen_from_pending(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1227,7 +1409,7 @@ namespace hpx { namespace threads {
         return result;
     }
 
-    std::int64_t threadmanager::get_num_stolen_from_staged(bool reset)
+    std::int64_t threadmanager::get_num_stolen_from_staged(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1235,7 +1417,7 @@ namespace hpx { namespace threads {
         return result;
     }
 
-    std::int64_t threadmanager::get_num_stolen_to_pending(bool reset)
+    std::int64_t threadmanager::get_num_stolen_to_pending(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1243,7 +1425,7 @@ namespace hpx { namespace threads {
         return result;
     }
 
-    std::int64_t threadmanager::get_num_stolen_to_staged(bool reset)
+    std::int64_t threadmanager::get_num_stolen_to_staged(bool reset) const
     {
         std::int64_t result = 0;
         for (auto const& pool_iter : pools_)
@@ -1253,13 +1435,13 @@ namespace hpx { namespace threads {
 #endif
 
     ///////////////////////////////////////////////////////////////////////////
-    bool threadmanager::run()
+    bool threadmanager::run() const
     {
         std::unique_lock<mutex_type> lk(mtx_);
 
-        // the main thread needs to have a unique thread_num
-        // worker threads are numbered 0..N-1, so we can use N for this thread
-        auto& rp = hpx::resource::get_partitioner();
+        // the main thread needs to have a unique thread_num worker threads are
+        // numbered 0 to N-1, so we can use N for this thread
+        auto const& rp = hpx::resource::get_partitioner();
         init_tss(rp.get_num_threads());
 
 #ifdef HPX_HAVE_TIMER_POOL
@@ -1267,9 +1449,9 @@ namespace hpx { namespace threads {
         timer_pool_.run(false);
 #endif
 
-        for (auto& pool_iter : pools_)
+        for (auto const& pool_iter : pools_)
         {
-            std::size_t num_threads_in_pool =
+            std::size_t const num_threads_in_pool =
                 rp.get_num_threads(pool_iter->get_pool_name());
 
             if (pool_iter->get_os_thread_count() != 0 ||
@@ -1287,8 +1469,7 @@ namespace hpx { namespace threads {
             }
 
             // set all states of all schedulers to "running"
-            policies::scheduler_base* sched = pool_iter->get_scheduler();
-            if (sched)
+            if (policies::scheduler_base* sched = pool_iter->get_scheduler())
                 sched->set_all_states(hpx::state::running);
         }
 
@@ -1296,39 +1477,39 @@ namespace hpx { namespace threads {
         return true;
     }
 
-    void threadmanager::stop(bool blocking)
+    void threadmanager::stop(bool blocking) const
     {
         LTM_(info).format("stop: blocking({})", blocking ? "true" : "false");
 
         std::unique_lock<mutex_type> lk(mtx_);
-        for (auto& pool_iter : pools_)
+        for (auto const& pool_iter : pools_)
         {
             pool_iter->stop(lk, blocking);
         }
         deinit_tss();
     }
 
-    bool threadmanager::is_busy()
+    bool threadmanager::is_busy() const
     {
         bool busy = false;
-        for (auto& pool_iter : pools_)
+        for (auto const& pool_iter : pools_)
         {
             busy = busy || pool_iter->is_busy();
         }
         return busy;
     }
 
-    bool threadmanager::is_idle()
+    bool threadmanager::is_idle() const
     {
         bool idle = true;
-        for (auto& pool_iter : pools_)
+        for (auto const& pool_iter : pools_)
         {
             idle = idle && pool_iter->is_idle();
         }
         return idle;
     }
 
-    void threadmanager::wait()
+    void threadmanager::wait() const
     {
         auto const shutdown_check_count = util::get_entry_as<std::size_t>(
             rtcfg_, "hpx.shutdown_check_count", 10);
@@ -1336,7 +1517,8 @@ namespace hpx { namespace threads {
             [this]() { return is_busy(); }, shutdown_check_count);
     }
 
-    bool threadmanager::wait_for(hpx::chrono::steady_duration const& rel_time)
+    bool threadmanager::wait_for(
+        hpx::chrono::steady_duration const& rel_time) const
     {
         auto const shutdown_check_count = util::get_entry_as<std::size_t>(
             rtcfg_, "hpx.shutdown_check_count", 10);
@@ -1344,7 +1526,7 @@ namespace hpx { namespace threads {
             [this]() { return is_busy(); }, shutdown_check_count, rel_time);
     }
 
-    void threadmanager::suspend()
+    void threadmanager::suspend() const
     {
         wait();
 
@@ -1361,14 +1543,14 @@ namespace hpx { namespace threads {
         }
         else
         {
-            for (auto& pool_iter : pools_)
+            for (auto const& pool_iter : pools_)
             {
                 pool_iter->suspend_direct();
             }
         }
     }
 
-    void threadmanager::resume()
+    void threadmanager::resume() const
     {
         if (threads::get_self_ptr())
         {
@@ -1382,10 +1564,23 @@ namespace hpx { namespace threads {
         }
         else
         {
-            for (auto& pool_iter : pools_)
+            for (auto const& pool_iter : pools_)
             {
                 pool_iter->resume_direct();
             }
         }
     }
-}}    // namespace hpx::threads
+
+    hpx::state threadmanager::status() const
+    {
+        hpx::state result(hpx::state::last_valid_runtime_state);
+
+        for (auto& pool_iter : pools_)
+        {
+            hpx::state s = pool_iter->get_state();
+            result = (std::min)(result, s);
+        }
+
+        return result;
+    }
+}    // namespace hpx::threads
diff --git a/libs/core/topology/include/hpx/topology/cpu_mask.hpp b/libs/core/topology/include/hpx/topology/cpu_mask.hpp
index 5e9f7a68c5a0..75e897b5fd32 100644
--- a/libs/core/topology/include/hpx/topology/cpu_mask.hpp
+++ b/libs/core/topology/include/hpx/topology/cpu_mask.hpp
@@ -1,5 +1,5 @@
 ////////////////////////////////////////////////////////////////////////////////
-//  Copyright (c) 2007-2015 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //  Copyright (c) 2008-2009 Chirag Dekate, Anshul Tandon
 //  Copyright (c) 2012-2013 Thomas Heller
 //
@@ -43,52 +43,52 @@ namespace hpx::threads {
     using mask_rvref_type = std::uint64_t;
     using mask_cref_type = std::uint64_t;
 
-    constexpr inline std::uint64_t bits(std::size_t idx) noexcept
+    constexpr std::uint64_t bits(std::size_t idx) noexcept
     {
         HPX_ASSERT(idx < CHAR_BIT * sizeof(mask_type));
-        return std::uint64_t(1) << idx;
+        return static_cast<std::uint64_t>(1) << idx;
     }
 
-    constexpr inline bool any(mask_cref_type mask) noexcept
+    constexpr bool any(mask_cref_type mask) noexcept
     {
         return mask != 0;
     }
 
-    constexpr inline mask_type not_(mask_cref_type mask) noexcept
+    constexpr mask_type not_(mask_cref_type mask) noexcept
     {
         return ~mask;
     }
 
-    constexpr inline bool test(mask_cref_type mask, std::size_t idx) noexcept
+    constexpr bool test(mask_cref_type mask, std::size_t idx) noexcept
     {
         HPX_ASSERT(idx < CHAR_BIT * sizeof(mask_type));
         return (bits(idx) & mask) != 0;
     }
 
-    constexpr inline void set(mask_type& mask, std::size_t idx) noexcept
+    constexpr void set(mask_type& mask, std::size_t idx) noexcept
     {
         HPX_ASSERT(idx < CHAR_BIT * sizeof(mask_type));
         mask |= bits(idx);
     }
 
-    constexpr inline void unset(mask_type& mask, std::size_t idx) noexcept
+    constexpr void unset(mask_type& mask, std::size_t idx) noexcept
     {
         HPX_ASSERT(idx < CHAR_BIT * sizeof(mask_type));
         mask &= not_(bits(idx));
     }
 
-    constexpr inline std::size_t mask_size(mask_cref_type /*mask*/) noexcept
+    constexpr std::size_t mask_size(mask_cref_type /*mask*/) noexcept
     {
         return CHAR_BIT * sizeof(mask_type);
     }
 
-    constexpr inline void resize(
+    constexpr void resize(
         mask_type& /*mask*/, [[maybe_unused]] std::size_t s) noexcept
     {
         HPX_ASSERT(s <= CHAR_BIT * sizeof(mask_type));
     }
 
-    constexpr inline std::size_t find_first(mask_cref_type mask) noexcept
+    constexpr std::size_t find_first(mask_cref_type mask) noexcept
     {
         if (mask)
         {
@@ -101,24 +101,24 @@ namespace hpx::threads {
 
             return c;
         }
-        return ~std::size_t(0);
+        return ~static_cast<std::size_t>(0);
     }
 
-    constexpr inline bool equal(
+    constexpr bool equal(
         mask_cref_type lhs, mask_cref_type rhs, std::size_t = 0) noexcept
     {
         return lhs == rhs;
     }
 
     // return true if at least one of the masks has a bit set
-    constexpr inline bool bit_or(
+    constexpr bool bit_or(
         mask_cref_type lhs, mask_cref_type rhs, std::size_t = 0) noexcept
     {
         return (lhs | rhs) != 0;
     }
 
     // return true if at least one bit is set in both masks
-    constexpr inline bool bit_and(
+    constexpr bool bit_and(
         mask_cref_type lhs, mask_cref_type rhs, std::size_t = 0) noexcept
     {
         return (lhs & rhs) != 0;
@@ -126,7 +126,7 @@ namespace hpx::threads {
 
     // returns the number of bits set, taken from:
     // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetKernighan
-    constexpr inline std::size_t count(mask_type mask) noexcept
+    constexpr std::size_t count(mask_type mask) noexcept
     {
         std::size_t c = 0;    // c accumulates the total bits set in v
         for (; mask; ++c)
@@ -136,7 +136,7 @@ namespace hpx::threads {
         return c;
     }
 
-    constexpr inline void reset(mask_type& mask) noexcept
+    constexpr void reset(mask_type& mask) noexcept
     {
         mask = 0ull;
     }
diff --git a/libs/core/topology/include/hpx/topology/topology.hpp b/libs/core/topology/include/hpx/topology/topology.hpp
index b88ea2ae96da..3842afec0861 100644
--- a/libs/core/topology/include/hpx/topology/topology.hpp
+++ b/libs/core/topology/include/hpx/topology/topology.hpp
@@ -416,7 +416,7 @@ namespace hpx::threads {
         // For example, core_affinity_masks[0] is a bitmask, where the
         // elements = 1 indicate the PUs that belong to the core on which
         // PU #0 (zero-based index) lies.
-        mask_type machine_affinity_mask_{};
+        mask_type machine_affinity_mask_ = mask_type();
         std::vector<mask_type> socket_affinity_masks_;
         std::vector<mask_type> numa_node_affinity_masks_;
         std::vector<mask_type> core_affinity_masks_;
diff --git a/libs/full/performance_counters/src/threadmanager_counter_types.cpp b/libs/full/performance_counters/src/threadmanager_counter_types.cpp
index 84132c01b1cf..a71ef6488ddc 100644
--- a/libs/full/performance_counters/src/threadmanager_counter_types.cpp
+++ b/libs/full/performance_counters/src/threadmanager_counter_types.cpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2022 Hartmut Kaiser
+//  Copyright (c) 2007-2024 Hartmut Kaiser
 //  Copyright (c)      2011 Bryce Lelbach, Katelyn Kufahl
 //  Copyright (c) 2008-2009 Chirag Dekate, Anshul Tandon
 //  Copyright (c) 2015 Patricia Grubel
@@ -19,7 +19,9 @@
 #include <hpx/performance_counters/manage_counter_type.hpp>
 #include <hpx/performance_counters/threadmanager_counter_types.hpp>
 #include <hpx/runtime_local/thread_pool_helpers.hpp>
+#ifdef HPX_HAVE_THREAD_QUEUE_WAITTIME
 #include <hpx/schedulers/maintain_queue_wait_times.hpp>
+#endif
 
 #include <cstddef>
 #include <cstdint>
@@ -29,7 +31,7 @@
 namespace hpx::performance_counters::detail {
 
     using threadmanager_counter_func = std::int64_t (threads::threadmanager::*)(
-        bool reset);
+        bool reset) const;
     using threadpool_counter_func = std::int64_t (threads::thread_pool_base::*)(
         std::size_t num_thread, bool reset);
 
@@ -89,7 +91,7 @@ namespace hpx::performance_counters::detail {
         else if (paths.instancename_ == "pool")
         {
             if (paths.instanceindex_ >= 0 &&
-                std::size_t(paths.instanceindex_) <
+                static_cast<std::size_t>(paths.instanceindex_) <
                     hpx::resource::get_num_thread_pools())
             {
                 // specific for given pool counter
@@ -105,7 +107,8 @@ namespace hpx::performance_counters::detail {
         }
         else if (paths.instancename_ == "worker-thread" &&
             paths.instanceindex_ >= 0 &&
-            std::size_t(paths.instanceindex_) < pool.get_os_thread_count())
+            static_cast<std::size_t>(paths.instanceindex_) <
+                pool.get_os_thread_count())
         {
             // specific counter from default
             using detail::create_raw_counter;
@@ -122,7 +125,8 @@ namespace hpx::performance_counters::detail {
 
     // scheduler utilization counter creation function
     naming::gid_type scheduler_utilization_counter_creator(
-        threads::threadmanager* tm, counter_info const& info, error_code& ec)
+        threads::threadmanager const* tm, counter_info const& info,
+        error_code& ec)
     {
         // verify the validity of the counter instance name
         counter_path_elements paths;
@@ -162,7 +166,7 @@ namespace hpx::performance_counters::detail {
                     &pool);
                 return create_raw_counter(info, HPX_MOVE(f), ec);
             }
-            else if (std::size_t(paths.instanceindex_) <
+            else if (static_cast<std::size_t>(paths.instanceindex_) <
                 hpx::resource::get_num_thread_pools())
             {
                 // counter specific for given pool
@@ -187,7 +191,7 @@ namespace hpx::performance_counters::detail {
     // /threads{locality#%d/worker-thread#%d}/idle-loop-count/instantaneous
     // /threads{locality#%d/pool#%s/worker-thread#%d}/idle-loop-count/instantaneous
     naming::gid_type locality_pool_thread_no_total_counter_creator(
-        threads::threadmanager* tm, threadpool_counter_func pool_func,
+        threads::threadmanager const* tm, threadpool_counter_func pool_func,
         counter_info const& info, error_code& ec)
     {
         // verify the validity of the counter instance name
@@ -218,7 +222,7 @@ namespace hpx::performance_counters::detail {
         else if (paths.instancename_ == "pool")
         {
             if (paths.instanceindex_ >= 0 &&
-                std::size_t(paths.instanceindex_) <
+                static_cast<std::size_t>(paths.instanceindex_) <
                     hpx::resource::get_num_thread_pools())
             {
                 // specific for given pool counter
@@ -234,7 +238,8 @@ namespace hpx::performance_counters::detail {
         }
         else if (paths.instancename_ == "worker-thread" &&
             paths.instanceindex_ >= 0 &&
-            std::size_t(paths.instanceindex_) < pool.get_os_thread_count())
+            static_cast<std::size_t>(paths.instanceindex_) <
+                pool.get_os_thread_count())
         {
             // specific counter
             using detail::create_raw_counter;
@@ -275,7 +280,7 @@ namespace hpx::performance_counters::detail {
         else if (!individual_creator.empty() &&
             paths.instancename_ == individual_name &&
             paths.instanceindex_ >= 0 &&
-            std::size_t(paths.instanceindex_) < individual_count)
+            static_cast<std::size_t>(paths.instanceindex_) < individual_count)
         {
             // specific counter
             using detail::create_raw_counter;
@@ -354,7 +359,7 @@ namespace hpx::performance_counters {
             hpx::bind_front(&detail::thread_counts_counter_creator));
 #endif
 
-        generic_counter_type_data counter_types[] = {
+        generic_counter_type_data const counter_types[] = {
             // length of thread queue(s)
             {"/threadqueue/length", counter_type::raw,
                 "returns the current queue length for the referenced queue",
diff --git a/tests/performance/local/future_overhead.cpp b/tests/performance/local/future_overhead.cpp
index fbb443c70efa..26d417b8a3aa 100644
--- a/tests/performance/local/future_overhead.cpp
+++ b/tests/performance/local/future_overhead.cpp
@@ -50,11 +50,11 @@ static std::uint64_t num_threads = 1;
 static std::string info_string = "";
 
 ///////////////////////////////////////////////////////////////////////////////
-void print_stats(const char* title, const char* wait, const char* exec,
+void print_stats(char const* title, char const* wait, char const* exec,
     std::int64_t count, double duration, bool csv)
 {
     std::ostringstream temp;
-    double us = 1e6 * duration / count;
+    double const us = 1e6 * duration / count;
     if (csv)
     {
         hpx::util::format_to(temp,
@@ -76,12 +76,12 @@ void print_stats(const char* title, const char* wait, const char* exec,
     //hpx::util::print_cdash_timing(title, duration);
 }
 
-const char* exec_name(hpx::execution::parallel_executor const&)
+char const* exec_name(hpx::execution::parallel_executor const&)
 {
     return "parallel_executor";
 }
 
-const char* exec_name(hpx::execution::experimental::scheduler_executor<
+char const* exec_name(hpx::execution::experimental::scheduler_executor<
     hpx::execution::experimental::thread_pool_scheduler> const&)
 {
     return "scheduler_executor<thread_pool_scheduler>";
@@ -89,15 +89,15 @@ const char* exec_name(hpx::execution::experimental::scheduler_executor<
 
 ///////////////////////////////////////////////////////////////////////////////
 // we use globals here to prevent the delay from being optimized away
-double global_scratch = 0;
-std::uint64_t num_iterations = 0;
+double volatile global_scratch = 0;
+std::uint64_t volatile num_iterations = 0;
 
 ///////////////////////////////////////////////////////////////////////////////
 double null_function() noexcept
 {
     if (num_iterations > 0)
     {
-        const int array_size = 4096;
+        constexpr int array_size = 4096;
         std::array<double, array_size> dummy;
         for (std::uint64_t i = 0; i < num_iterations; ++i)
         {
@@ -106,6 +106,7 @@ double null_function() noexcept
                 dummy[j] = 1.0 / (2.0 * i * j + 1.0);
             }
         }
+        global_scratch = dummy[0];
         return dummy[0];
     }
     return 0.0;
@@ -115,7 +116,7 @@ struct scratcher
 {
     void operator()(future<double> r) const
     {
-        global_scratch += r.get();
+        global_scratch = global_scratch + r.get();
     }
 };
 
@@ -125,36 +126,36 @@ HPX_PLAIN_ACTION(null_function, null_action)
 // Time async action execution using wait each on futures vector
 void measure_action_futures_wait_each(std::uint64_t count, bool csv)
 {
-    const hpx::id_type here = hpx::find_here();
+    hpx::id_type const here = hpx::find_here();
     std::vector<future<double>> futures;
     futures.reserve(count);
 
     // start the clock
-    high_resolution_timer walltime;
+    high_resolution_timer const walltime;
     for (std::uint64_t i = 0; i < count; ++i)
         futures.push_back(async<null_action>(here));
     hpx::wait_each(scratcher(), futures);
 
     // stop the clock
-    const double duration = walltime.elapsed();
+    double const duration = walltime.elapsed();
     print_stats("action", "WaitEach", "no-executor", count, duration, csv);
 }
 
 // Time async action execution using wait each on futures vector
 void measure_action_futures_wait_all(std::uint64_t count, bool csv)
 {
-    const hpx::id_type here = hpx::find_here();
+    hpx::id_type const here = hpx::find_here();
     std::vector<future<double>> futures;
     futures.reserve(count);
 
     // start the clock
-    high_resolution_timer walltime;
+    high_resolution_timer const walltime;
     for (std::uint64_t i = 0; i < count; ++i)
         futures.push_back(async<null_action>(here));
     hpx::wait_all(futures);
 
     // stop the clock
-    const double duration = walltime.elapsed();
+    double const duration = walltime.elapsed();
     print_stats("action", "WaitAll", "no-executor", count, duration, csv);
 }
 #endif
@@ -168,13 +169,13 @@ void measure_function_futures_wait_each(
     futures.reserve(count);
 
     // start the clock
-    high_resolution_timer walltime;
+    high_resolution_timer const walltime;
     for (std::uint64_t i = 0; i < count; ++i)
         futures.push_back(async(exec, &null_function));
     hpx::wait_each(scratcher(), futures);
 
     // stop the clock
-    const double duration = walltime.elapsed();
+    double const duration = walltime.elapsed();
     print_stats("async", "WaitEach", exec_name(exec), count, duration, csv);
 }
 
@@ -186,12 +187,12 @@ void measure_function_futures_wait_all(
     futures.reserve(count);
 
     // start the clock
-    high_resolution_timer walltime;
+    high_resolution_timer const walltime;
     for (std::uint64_t i = 0; i < count; ++i)
         futures.push_back(async(exec, &null_function));
     hpx::wait_all(futures);
 
-    const double duration = walltime.elapsed();
+    double const duration = walltime.elapsed();
     print_stats("async", "WaitAll", exec_name(exec), count, duration, csv);
 }
 
@@ -226,7 +227,7 @@ void measure_function_futures_limiting_executor(
     hpx::execution::experimental::static_chunk_size fixed(chunk_size);
 
     // start the clock
-    high_resolution_timer walltime;
+    high_resolution_timer const walltime;
     {
         hpx::execution::experimental::limiting_executor<Executor> signal_exec(
             exec, tasks, tasks + 1000);
@@ -234,7 +235,7 @@ void measure_function_futures_limiting_executor(
             hpx::execution::par.with(fixed), 0, count, [&](std::uint64_t) {
                 hpx::post(signal_exec, [&]() {
                     null_function();
-                    sanity_check--;
+                    --sanity_check;
                 });
             });
     }
@@ -246,7 +247,7 @@ void measure_function_futures_limiting_executor(
     }
 
     // stop the clock
-    const double duration = walltime.elapsed();
+    double const duration = walltime.elapsed();
     print_stats(
         "apply", "limiting-Exec", exec_name(exec), count, duration, csv);
 }
@@ -256,8 +257,8 @@ void measure_function_futures_sliding_semaphore(
     std::uint64_t count, bool csv, Executor& exec)
 {
     // start the clock
-    high_resolution_timer walltime;
-    const int sem_count = 5000;
+    high_resolution_timer const walltime;
+    constexpr int sem_count = 5000;
     auto sem = std::make_shared<hpx::sliding_semaphore>(sem_count);
     for (std::uint64_t i = 0; i < count; ++i)
     {
@@ -270,7 +271,7 @@ void measure_function_futures_sliding_semaphore(
     sem->wait(count + sem_count - 1);
 
     // stop the clock
-    const double duration = walltime.elapsed();
+    double const duration = walltime.elapsed();
     print_stats("apply", "Sliding-Sem", exec_name(exec), count, duration, csv);
 }
 
@@ -286,20 +287,18 @@ struct unlimited_number_of_chunks
     }
 };
 
-namespace hpx::parallel::execution {
-
-    template <>
-    struct is_executor_parameters<unlimited_number_of_chunks> : std::true_type
-    {
-    };
-}    // namespace hpx::parallel::execution
+template <>
+struct hpx::parallel::execution::is_executor_parameters<
+    unlimited_number_of_chunks> : std::true_type
+{
+};
 
 template <typename Executor>
 void measure_function_futures_for_loop(std::uint64_t count, bool csv,
     Executor& exec, char const* executor_name = nullptr)
 {
     // start the clock
-    high_resolution_timer walltime;
+    high_resolution_timer const walltime;
     hpx::experimental::for_loop(
         hpx::execution::par.on(exec).with(
             hpx::execution::experimental::static_chunk_size(1),
@@ -307,7 +306,7 @@ void measure_function_futures_for_loop(std::uint64_t count, bool csv,
         0, count, [](std::uint64_t) { null_function(); });
 
     // stop the clock
-    const double duration = walltime.elapsed();
+    double const duration = walltime.elapsed();
     print_stats("for_loop", "par",
         executor_name ? executor_name : exec_name(exec), count, duration, csv);
 }
@@ -317,7 +316,7 @@ void measure_function_futures_register_work(std::uint64_t count, bool csv)
     hpx::latch l(count);
 
     // start the clock
-    high_resolution_timer walltime;
+    high_resolution_timer const walltime;
     for (std::uint64_t i = 0; i < count; ++i)
     {
         hpx::threads::thread_init_data data(
@@ -331,7 +330,7 @@ void measure_function_futures_register_work(std::uint64_t count, bool csv)
     l.wait();
 
     // stop the clock
-    const double duration = walltime.elapsed();
+    double const duration = walltime.elapsed();
     print_stats("register_work", "latch", "none", count, duration, csv);
 }
 
@@ -346,14 +345,14 @@ void measure_function_futures_create_thread(std::uint64_t count, bool csv)
     };
     auto const thread_func =
         hpx::threads::detail::thread_function_nullary<decltype(func)>{func};
-    auto const desc = hpx::threads::thread_description();
-    auto const prio = hpx::threads::thread_priority::normal;
-    auto const hint = hpx::threads::thread_schedule_hint();
-    auto const stack_size = hpx::threads::thread_stacksize::small_;
+    constexpr auto desc = hpx::threads::thread_description();
+    constexpr auto prio = hpx::threads::thread_priority::normal;
+    constexpr auto hint = hpx::threads::thread_schedule_hint();
+    constexpr auto stack_size = hpx::threads::thread_stacksize::small_;
     hpx::error_code ec;
 
     // start the clock
-    high_resolution_timer walltime;
+    high_resolution_timer const walltime;
     for (std::uint64_t i = 0; i < count; ++i)
     {
         auto init = hpx::threads::thread_init_data(
@@ -365,7 +364,7 @@ void measure_function_futures_create_thread(std::uint64_t count, bool csv)
     l.wait();
 
     // stop the clock
-    const double duration = walltime.elapsed();
+    double const duration = walltime.elapsed();
     print_stats("create_thread", "latch", "none", count, duration, csv);
 }
 
@@ -402,7 +401,7 @@ void measure_function_futures_create_thread_hierarchical_placement(
     hpx::error_code ec;
 
     // start the clock
-    high_resolution_timer walltime;
+    high_resolution_timer const walltime;
     for (std::size_t t = 0; t < num_threads; ++t)
     {
         auto const hint =
@@ -434,7 +433,7 @@ void measure_function_futures_create_thread_hierarchical_placement(
     l.wait();
 
     // stop the clock
-    const double duration = walltime.elapsed();
+    double const duration = walltime.elapsed();
     print_stats(
         "create_thread_hierarchical", "latch", "none", count, duration, csv);
 }
@@ -451,7 +450,7 @@ void measure_function_futures_apply_hierarchical_placement(
     auto const num_threads = hpx::get_num_worker_threads();
 
     // start the clock
-    high_resolution_timer walltime;
+    high_resolution_timer const walltime;
     for (std::size_t t = 0; t < num_threads; ++t)
     {
         auto const hint =
@@ -473,7 +472,7 @@ void measure_function_futures_apply_hierarchical_placement(
     l.wait();
 
     // stop the clock
-    const double duration = walltime.elapsed();
+    double const duration = walltime.elapsed();
     print_stats("apply_hierarchical", "latch", "parallel_executor", count,
         duration, csv);
 }
@@ -490,8 +489,8 @@ int hpx_main(variables_map& vm)
         else
             numa_sensitive = 0;
 
-        bool test_all = (vm.count("test-all") > 0);
-        const int repetitions = vm["repetitions"].as<int>();
+        bool const test_all = (vm.count("test-all") > 0);
+        int const repetitions = vm["repetitions"].as<int>();
 
         if (vm.count("info"))
             info_string = vm["info"].as<std::string>();
@@ -500,8 +499,8 @@ int hpx_main(variables_map& vm)
 
         num_iterations = vm["delay-iterations"].as<std::uint64_t>();
 
-        const std::uint64_t count = vm["futures"].as<std::uint64_t>();
-        bool csv = vm.count("csv") != 0;
+        std::uint64_t const count = vm["futures"].as<std::uint64_t>();
+        bool const csv = vm.count("csv") != 0;
         if (HPX_UNLIKELY(0 == count))
             throw std::logic_error("error: count of 0 futures specified\n");