From 56075d18fa3b38a02f7f5dcdf08eb9ea5fd87acc Mon Sep 17 00:00:00 2001 From: Hartmut Kaiser Date: Wed, 17 Apr 2024 09:08:29 -0500 Subject: [PATCH] Remove remnants of libfabric parcelport --- libs/full/CMakeLists.txt | 1 - libs/full/modules.rst | 1 - libs/full/parcelport_libfabric/CMakeLists.txt | 67 - .../parcelport_libfabric/cmake/FindPMI.cmake | 73 - .../cmake/HPX_SetupLibfabric.cmake | 337 ----- libs/full/parcelport_libfabric/docs/index.rst | 18 - .../examples/CMakeLists.txt | 18 - .../connection_handler.hpp | 94 -- .../hpx/parcelport_libfabric/fabric_error.hpp | 61 - .../hpx/parcelport_libfabric/header.hpp | 440 ------ .../libfabric_controller.hpp | 1342 ----------------- .../libfabric_region_provider.hpp | 47 - .../hpx/parcelport_libfabric/locality.hpp | 213 --- .../parcelport_libfabric.hpp | 281 ---- .../parcelport_logging.hpp | 287 ---- .../performance_counter.hpp | 161 -- .../pinned_memory_vector.hpp | 238 --- .../hpx/parcelport_libfabric/rdma_locks.hpp | 58 - .../readers_writers_mutex.hpp | 318 ---- .../hpx/parcelport_libfabric/receiver.hpp | 97 -- .../hpx/parcelport_libfabric/rma_base.hpp | 26 - .../parcelport_libfabric/rma_memory_pool.hpp | 504 ------- .../rma_memory_region.hpp | 324 ---- .../rma_memory_region_traits.hpp | 40 - .../hpx/parcelport_libfabric/rma_receiver.hpp | 127 -- .../hpx/parcelport_libfabric/sender.hpp | 147 -- .../parcelport_libfabric/unordered_map.hpp | 386 ----- .../src/parcelport_libfabric.cpp | 433 ------ .../parcelport_libfabric/src/receiver.cpp | 201 --- .../parcelport_libfabric/src/rma_receiver.cpp | 576 ------- libs/full/parcelport_libfabric/src/sender.cpp | 335 ---- .../parcelport_libfabric/tests/CMakeLists.txt | 42 - .../tests/performance/CMakeLists.txt | 5 - .../tests/regressions/CMakeLists.txt | 5 - .../tests/unit/CMakeLists.txt | 5 - .../tests/unit/thread_mapper_parcel_pools.cpp | 2 +- .../slurm-network-storage.sh.in | 8 +- 37 files changed, 3 insertions(+), 7315 deletions(-) delete mode 100644 libs/full/parcelport_libfabric/CMakeLists.txt delete mode 100644 libs/full/parcelport_libfabric/cmake/FindPMI.cmake delete mode 100644 libs/full/parcelport_libfabric/cmake/HPX_SetupLibfabric.cmake delete mode 100644 libs/full/parcelport_libfabric/docs/index.rst delete mode 100644 libs/full/parcelport_libfabric/examples/CMakeLists.txt delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/connection_handler.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/fabric_error.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/header.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/libfabric_controller.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/libfabric_region_provider.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/locality.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/parcelport_libfabric.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/parcelport_logging.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/performance_counter.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/pinned_memory_vector.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rdma_locks.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/readers_writers_mutex.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/receiver.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_base.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_memory_pool.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_memory_region.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_memory_region_traits.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_receiver.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/sender.hpp delete mode 100644 libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/unordered_map.hpp delete mode 100644 libs/full/parcelport_libfabric/src/parcelport_libfabric.cpp delete mode 100644 libs/full/parcelport_libfabric/src/receiver.cpp delete mode 100644 libs/full/parcelport_libfabric/src/rma_receiver.cpp delete mode 100644 libs/full/parcelport_libfabric/src/sender.cpp delete mode 100644 libs/full/parcelport_libfabric/tests/CMakeLists.txt delete mode 100644 libs/full/parcelport_libfabric/tests/performance/CMakeLists.txt delete mode 100644 libs/full/parcelport_libfabric/tests/regressions/CMakeLists.txt delete mode 100644 libs/full/parcelport_libfabric/tests/unit/CMakeLists.txt diff --git a/libs/full/CMakeLists.txt b/libs/full/CMakeLists.txt index e4a7eb24b923..8571c8a4dce4 100644 --- a/libs/full/CMakeLists.txt +++ b/libs/full/CMakeLists.txt @@ -33,7 +33,6 @@ set(_hpx_full_modules naming_base parcelport_gasnet parcelport_lci - parcelport_libfabric parcelport_mpi parcelport_tcp parcelports diff --git a/libs/full/modules.rst b/libs/full/modules.rst index 0f025215b8a5..3eafff8e37c2 100644 --- a/libs/full/modules.rst +++ b/libs/full/modules.rst @@ -35,7 +35,6 @@ Main |hpx| modules /libs/full/naming/docs/index.rst /libs/full/naming_base/docs/index.rst /libs/full/parcelport_lci/docs/index.rst - /libs/full/parcelport_libfabric/docs/index.rst /libs/full/parcelport_mpi/docs/index.rst /libs/full/parcelport_tcp/docs/index.rst /libs/full/parcelset/docs/index.rst diff --git a/libs/full/parcelport_libfabric/CMakeLists.txt b/libs/full/parcelport_libfabric/CMakeLists.txt deleted file mode 100644 index 25c8a8e9fbc5..000000000000 --- a/libs/full/parcelport_libfabric/CMakeLists.txt +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) 2019-2021 The STE||AR-Group -# -# SPDX-License-Identifier: BSL-1.0 -# Distributed under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -if(NOT (HPX_WITH_NETWORKING AND HPX_WITH_PARCELPORT_LIBFABRIC)) - return() -endif() - -list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") - -# Configuring options -include(HPX_SetupLibfabric) - -set(libfabric_libraries Libfabric::libfabric) -if(HPX_PARCELPORT_LIBFABRIC_WITH_BOOTSTRAPPING) - set(libfabric_libraries ${libfabric_libraries} Pmi::pmi) -endif() - -set(parcelport_libfabric_headers - hpx/parcelport_libfabric/connection_handler.hpp - hpx/parcelport_libfabric/fabric_error.hpp - hpx/parcelport_libfabric/header.hpp - hpx/parcelport_libfabric/libfabric_controller.hpp - hpx/parcelport_libfabric/libfabric_region_provider.hpp - hpx/parcelport_libfabric/locality.hpp - hpx/parcelport_libfabric/parcelport_libfabric.hpp - hpx/parcelport_libfabric/parcelport_logging.hpp - hpx/parcelport_libfabric/performance_counter.hpp - hpx/parcelport_libfabric/pinned_memory_vector.hpp - hpx/parcelport_libfabric/rdma_locks.hpp - hpx/parcelport_libfabric/readers_writers_mutex.hpp - hpx/parcelport_libfabric/receiver.hpp - hpx/parcelport_libfabric/rma_base.hpp - hpx/parcelport_libfabric/rma_memory_pool.hpp - hpx/parcelport_libfabric/rma_memory_region.hpp - hpx/parcelport_libfabric/rma_memory_region_traits.hpp - hpx/parcelport_libfabric/rma_receiver.hpp - hpx/parcelport_libfabric/sender.hpp - hpx/parcelport_libfabric/unordered_map.hpp -) - -# cmake-format: off -set(parcelport_libfabric_compat_headers) -# cmake-format: on - -set(parcelport_libfabric_sources parcelport_libfabric.cpp receiver.cpp - rma_receiver.cpp sender.cpp -) - -include(HPX_AddModule) -add_hpx_module( - full parcelport_libfabric - GLOBAL_HEADER_GEN ON - SOURCES ${parcelport_libfabric_sources} - HEADERS ${parcelport_libfabric_headers} - COMPAT_HEADERS ${parcelport_libfabric_compat_headers} - DEPENDENCIES hpx_core ${libfabric_libraries} - MODULE_DEPENDENCIES hpx_actions hpx_command_line_handling hpx_parcelset - CMAKE_SUBDIRS examples tests -) - -set(HPX_STATIC_PARCELPORT_PLUGINS - ${HPX_STATIC_PARCELPORT_PLUGINS} parcelport_libfabric - CACHE INTERNAL "" FORCE -) diff --git a/libs/full/parcelport_libfabric/cmake/FindPMI.cmake b/libs/full/parcelport_libfabric/cmake/FindPMI.cmake deleted file mode 100644 index 3e3a45da1a15..000000000000 --- a/libs/full/parcelport_libfabric/cmake/FindPMI.cmake +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2017 Thomas Heller -# -# SPDX-License-Identifier: BSL-1.0 -# Distributed under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -# compatibility with older CMake versions -if(PMI_ROOT AND NOT Pmi_ROOT) - set(Pmi_ROOT - ${PMI_ROOT} - CACHE PATH "PMI base directory" - ) - unset(PMI_ROOT CACHE) -endif() - -find_package(PkgConfig QUIET) - -# look for cray pmi... -pkg_check_modules(PC_Pmi_CRAY QUIET cray-pmi) - -# look for the rest if we couldn't find the cray package -if(NOT PC_Pmi_CRAY_FOUND) - pkg_check_modules(PC_Pmi QUIET pmi) -endif() - -find_path( - Pmi_INCLUDE_DIR pmi2.h - HINTS ${Pmi_ROOT} - ENV - PMI_ROOT - ${Pmi_DIR} - ENV - PMI_DIR - ${PC_Pmi_CRAY_INCLUDEDIR} - ${PC_Pmi_CRAY_INCLUDE_DIRS} - ${PC_Pmi_INCLUDEDIR} - ${PC_Pmi_INCLUDE_DIRS} - PATH_SUFFIXES include -) - -find_library( - Pmi_LIBRARY - NAMES pmi - HINTS ${Pmi_ROOT} - ENV - PMI_ROOT - ${PC_Pmi_CRAY_LIBDIR} - ${PC_Pmi_CRAY_LIBRARY_DIRS} - ${PC_Pmi_LIBDIR} - ${PC_Pmi_LIBRARY_DIRS} - PATH_SUFFIXES lib lib64 -) - -# Set Pmi_ROOT in case the other hints are used -if(Pmi_ROOT) - # The call to file is for compatibility with windows paths - file(TO_CMAKE_PATH ${Pmi_ROOT} Pmi_ROOT) -elseif(DEFINED ENV{PMI_ROOT}) - file(TO_CMAKE_PATH $ENV{PMI_ROOT} Pmi_ROOT) -else() - file(TO_CMAKE_PATH "${Pmi_INCLUDE_DIR}" Pmi_INCLUDE_DIR) - string(REPLACE "/include" "" Pmi_ROOT "${Pmi_INCLUDE_DIR}") -endif() - -if(NOT Pmi_LIBRARY OR NOT Pmi_INCLUDE_DIR) - set(Pmi_FOUND=OFF) - return() -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(PMI DEFAULT_MSG Pmi_LIBRARY Pmi_INCLUDE_DIR) - -mark_as_advanced(Pmi_ROOT Pmi_LIBRARY Pmi_INCLUDE_DIR) diff --git a/libs/full/parcelport_libfabric/cmake/HPX_SetupLibfabric.cmake b/libs/full/parcelport_libfabric/cmake/HPX_SetupLibfabric.cmake deleted file mode 100644 index 9f140b76a81e..000000000000 --- a/libs/full/parcelport_libfabric/cmake/HPX_SetupLibfabric.cmake +++ /dev/null @@ -1,337 +0,0 @@ -# Copyright (c) 2019 Ste||ar Group -# -# SPDX-License-Identifier: BSL-1.0 -# Distributed under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -# FIXME : in the future put it directly inside the cmake directory of the -# corresponding plugin - -if(HPX_WITH_PARCELPORT_LIBFABRIC AND NOT TARGET Libfabric::libfabric) - # ------------------------------------------------------------------------------ - # Add #define to global defines.hpp - # ------------------------------------------------------------------------------ - hpx_add_config_define(HPX_HAVE_PARCELPORT_LIBFABRIC) - - # ------------------------------------------------------------------------------ - # OFIWG libfabric stack - # ------------------------------------------------------------------------------ - find_package(Libfabric REQUIRED) - - # Setup Libfabric imported target - add_library(Libfabric::libfabric INTERFACE IMPORTED) - target_include_directories( - Libfabric::libfabric SYSTEM INTERFACE ${LIBFABRIC_INCLUDE_DIR} - ) - target_link_libraries(Libfabric::libfabric INTERFACE ${LIBFABRIC_LIBRARY}) - - # Setup PMI imported target - find_package(PMI) - if(PMI_FOUND) - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_HAVE_PMI NAMESPACE PARCELPORT_LIBFABRIC - ) - add_library(Pmi::pmi INTERFACE IMPORTED) - target_include_directories(Pmi::pmi SYSTEM INTERFACE ${PMI_INCLUDE_DIR}) - target_link_libraries(Pmi::pmi INTERFACE ${PMI_LIBRARY}) - endif() - - # ------------------------------------------------------------------------------ - # Logging - # ------------------------------------------------------------------------------ - hpx_option( - HPX_PARCELPORT_LIBFABRIC_WITH_LOGGING - BOOL - "Enable logging in the libfabric ParcelPort (default: OFF - Warning - severely impacts usability when enabled)" - OFF - CATEGORY "Parcelport" - ADVANCED - MODULE PARCELPORT_LIBFABRIC - ) - - if(HPX_PARCELPORT_LIBFABRIC_WITH_LOGGING) - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_HAVE_LOGGING - NAMESPACE PARCELPORT_LIBFABRIC - ) - endif() - - # ------------------------------------------------------------------------------ - # Development mode (extra logging and customizable tweaks) - # ------------------------------------------------------------------------------ - hpx_option( - HPX_PARCELPORT_LIBFABRIC_WITH_DEV_MODE - BOOL - "Enables some extra logging and debug features in the libfabric parcelport (default: OFF - Warning - severely impacts usability when enabled)" - OFF - CATEGORY "Parcelport" - ADVANCED - MODULE PARCELPORT_LIBFABRIC - ) - - if(HPX_PARCELPORT_LIBFABRIC_WITH_DEV_MODE) - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_HAVE_DEV_MODE - NAMESPACE PARCELPORT_LIBFABRIC - ) - endif() - - # ------------------------------------------------------------------------------ - # make sure boost log is linked correctly - # ------------------------------------------------------------------------------ - if(HPX_PARCELPORT_LIBFABRIC_WITH_LOGGING - OR HPX_PARCELPORT_LIBFABRIC_WITH_DEV_MODE - ) - if(NOT Boost_USE_STATIC_LIBS) - hpx_add_config_define_namespace( - DEFINE BOOST_LOG_DYN_LINK NAMESPACE PARCELPORT_LIBFABRIC - ) - endif() - endif() - - # ------------------------------------------------------------------------------ - # Hardware device selection - # ------------------------------------------------------------------------------ - hpx_option( - HPX_PARCELPORT_LIBFABRIC_PROVIDER STRING - "The provider (verbs/gni/psm2/sockets)" "verbs" - CATEGORY "Parcelport" - ADVANCED - MODULE PARCELPORT_LIBFABRIC - ) - - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_PROVIDER - VALUE "\"${HPX_PARCELPORT_LIBFABRIC_PROVIDER}\"" - NAMESPACE PARCELPORT_LIBFABRIC - ) - - if(HPX_PARCELPORT_LIBFABRIC_PROVIDER MATCHES "verbs") - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_VERBS NAMESPACE PARCELPORT_LIBFABRIC - ) - elseif(HPX_PARCELPORT_LIBFABRIC_PROVIDER MATCHES "gni") - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_GNI NAMESPACE PARCELPORT_LIBFABRIC - ) - elseif(HPX_PARCELPORT_LIBFABRIC_PROVIDER MATCHES "sockets") - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_SOCKETS NAMESPACE PARCELPORT_LIBFABRIC - ) - elseif(HPX_PARCELPORT_LIBFABRIC_PROVIDER MATCHES "psm2") - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_PSM2 NAMESPACE PARCELPORT_LIBFABRIC - ) - endif() - - hpx_option( - HPX_PARCELPORT_LIBFABRIC_DOMAIN STRING - "The libfabric domain (leave blank for default" "" - CATEGORY "Parcelport" - ADVANCED - MODULE PARCELPORT_LIBFABRIC - ) - - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_DOMAIN - VALUE "\"${HPX_PARCELPORT_LIBFABRIC_DOMAIN}\"" - NAMESPACE PARCELPORT_LIBFABRIC - ) - - hpx_option( - HPX_PARCELPORT_LIBFABRIC_ENDPOINT STRING - "The libfabric endpoint type (leave blank for default" "rdm" - CATEGORY "Parcelport" - ADVANCED - MODULE PARCELPORT_LIBFABRIC - ) - - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_ENDPOINT - VALUE "\"${HPX_PARCELPORT_LIBFABRIC_ENDPOINT}\"" - NAMESPACE PARCELPORT_LIBFABRIC - ) - - # ------------------------------------------------------------------------------ - # Bootstrap options - # ------------------------------------------------------------------------------ - hpx_option( - HPX_PARCELPORT_LIBFABRIC_WITH_BOOTSTRAPPING - BOOL - "Configure the parcelport to enable bootstrap capabilities (default: OFF, enabled if PMI was found)" - ${PMI_FOUND} - CATEGORY "Parcelport" - ADVANCED - MODULE PARCELPORT_LIBFABRIC - ) - - if(HPX_PARCELPORT_LIBFABRIC_WITH_BOOTSTRAPPING) - if(NOT PMI_FOUND) - hpx_error( - "For libfabric bootstrapping to work, the PMI libraries must be enabled and found" - ) - endif() - - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_HAVE_BOOTSTRAPPING - VALUE std::true_type - NAMESPACE PARCELPORT_LIBFABRIC - ) - else() - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_HAVE_BOOTSTRAPPING - VALUE std::false_type - NAMESPACE PARCELPORT_LIBFABRIC - ) - endif() - - # ------------------------------------------------------------------------------ - # Performance counters - # ------------------------------------------------------------------------------ - hpx_option( - HPX_PARCELPORT_LIBFABRIC_WITH_PERFORMANCE_COUNTERS BOOL - "Enable libfabric parcelport performance counters (default: OFF)" OFF - CATEGORY "Parcelport" - ADVANCED - MODULE PARCELPORT_LIBFABRIC - ) - - if(HPX_PARCELPORT_LIBFABRIC_WITH_PERFORMANCE_COUNTERS) - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_HAVE_PERFORMANCE_COUNTERS - NAMESPACE PARCELPORT_LIBFABRIC - ) - endif() - - # ------------------------------------------------------------------------------ - # Throttling options - # ------------------------------------------------------------------------------ - hpx_option( - HPX_PARCELPORT_LIBFABRIC_THROTTLE_SENDS STRING - "Threshold of active sends at which throttling is enabled (default: 16)" - "16" - CATEGORY "Parcelport" - ADVANCED - MODULE PARCELPORT_LIBFABRIC - ) - - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_THROTTLE_SENDS - VALUE ${HPX_PARCELPORT_LIBFABRIC_THROTTLE_SENDS} - NAMESPACE PARCELPORT_LIBFABRIC - ) - - # ------------------------------------------------------------------------------ - # Custom Scheduler options - # ------------------------------------------------------------------------------ - hpx_option( - HPX_PARCELPORT_LIBFABRIC_USE_CUSTOM_SCHEDULER - BOOL - "Configure the parcelport to use a custom scheduler (default: OFF - Warning, experimental, may cause serious program errors)" - OFF - CATEGORY "Parcelport" - ADVANCED - MODULE PARCELPORT_LIBFABRIC - ) - - if(HPX_PARCELPORT_LIBFABRIC_USE_CUSTOM_SCHEDULER) - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_USE_CUSTOM_SCHEDULER - NAMESPACE PARCELPORT_LIBFABRIC - ) - endif() - - # ------------------------------------------------------------------------------ - # Lock checking - # ------------------------------------------------------------------------------ - hpx_option( - HPX_PARCELPORT_LIBFABRIC_DEBUG_LOCKS - BOOL - "Turn on extra log messages for lock/unlock (default: OFF - Warning, severely impacts performance when enabled)" - OFF - CATEGORY "Parcelport" - ADVANCED - MODULE PARCELPORT_LIBFABRIC - ) - - if(HPX_PARCELPORT_LIBFABRIC_DEBUG_LOCKS) - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_DEBUG_LOCKS - NAMESPACE PARCELPORT_LIBFABRIC - ) - endif() - - # ------------------------------------------------------------------------------ - # Memory chunk/reservation options - # ------------------------------------------------------------------------------ - hpx_option( - HPX_PARCELPORT_LIBFABRIC_MEMORY_CHUNK_SIZE STRING - "Number of bytes a default chunk in the memory pool can hold (default: 4K)" - "4096" - CATEGORY "Parcelport" - ADVANCED - MODULE PARCELPORT_LIBFABRIC - ) - - hpx_option( - HPX_PARCELPORT_LIBFABRIC_64K_PAGES STRING - "Number of 64K pages we reserve for default message buffers (default: 10)" - "10" - CATEGORY "Parcelport" - ADVANCED - MODULE PARCELPORT_LIBFABRIC - ) - - hpx_option( - HPX_PARCELPORT_LIBFABRIC_MEMORY_COPY_THRESHOLD - STRING - "Cutoff size over which data is never copied into existing buffers (default: 4K)" - "4096" - CATEGORY "Parcelport" - ADVANCED - MODULE PARCELPORT_LIBFABRIC - ) - - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_MEMORY_CHUNK_SIZE - VALUE ${HPX_PARCELPORT_LIBFABRIC_MEMORY_CHUNK_SIZE} - NAMESPACE PARCELPORT_LIBFABRIC - ) - - # define the message header size to be equal to the chunk size - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_MESSAGE_HEADER_SIZE - VALUE ${HPX_PARCELPORT_LIBFABRIC_MEMORY_CHUNK_SIZE} - NAMESPACE PARCELPORT_LIBFABRIC - ) - - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_64K_PAGES - VALUE ${HPX_PARCELPORT_LIBFABRIC_64K_PAGES} - NAMESPACE PARCELPORT_LIBFABRIC - ) - - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_MEMORY_COPY_THRESHOLD - VALUE ${HPX_PARCELPORT_LIBFABRIC_MEMORY_COPY_THRESHOLD} - NAMESPACE PARCELPORT_LIBFABRIC - ) - - # ------------------------------------------------------------------------------ - # Preposting options - # ------------------------------------------------------------------------------ - hpx_option( - HPX_PARCELPORT_LIBFABRIC_MAX_PREPOSTS STRING - "The number of pre-posted receive buffers (default: 512)" "512" - CATEGORY "Parcelport" - ADVANCED - MODULE PARCELPORT_LIBFABRIC - ) - - hpx_add_config_define_namespace( - DEFINE HPX_PARCELPORT_LIBFABRIC_MAX_PREPOSTS - VALUE ${HPX_PARCELPORT_LIBFABRIC_MAX_PREPOSTS} - NAMESPACE PARCELPORT_LIBFABRIC - ) - -endif() diff --git a/libs/full/parcelport_libfabric/docs/index.rst b/libs/full/parcelport_libfabric/docs/index.rst deleted file mode 100644 index 12108a802ba0..000000000000 --- a/libs/full/parcelport_libfabric/docs/index.rst +++ /dev/null @@ -1,18 +0,0 @@ -.. - Copyright (c) 2020-2021 The STE||AR-Group - - SPDX-License-Identifier: BSL-1.0 - Distributed under the Boost Software License, Version 1.0. (See accompanying - file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -.. _modules_parcelport_libfabric: - -==================== -parcelport_libfabric -==================== - -TODO: High-level description of the module. - -See the :ref:`API reference ` of this module for more -details. - diff --git a/libs/full/parcelport_libfabric/examples/CMakeLists.txt b/libs/full/parcelport_libfabric/examples/CMakeLists.txt deleted file mode 100644 index 48ab23bb61c0..000000000000 --- a/libs/full/parcelport_libfabric/examples/CMakeLists.txt +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) 2020-2021 The STE||AR-Group -# -# SPDX-License-Identifier: BSL-1.0 -# Distributed under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -if(HPX_WITH_EXAMPLES) - add_hpx_pseudo_target(examples.modules.parcelport_libfabric) - add_hpx_pseudo_dependencies( - examples.modules examples.modules.parcelport_libfabric - ) - if(HPX_WITH_TESTS AND HPX_WITH_TESTS_EXAMPLES) - add_hpx_pseudo_target(tests.examples.modules.parcelport_libfabric) - add_hpx_pseudo_dependencies( - tests.examples.modules tests.examples.modules.parcelport_libfabric - ) - endif() -endif() diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/connection_handler.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/connection_handler.hpp deleted file mode 100644 index 530343cc77a7..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/connection_handler.hpp +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright (c) 2015 John Biddiscombe -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include - -namespace hpx::parcelset { - - namespace policies::libfabric { - struct HPX_EXPORT parcelport; - } // namespace policies::libfabric - - using namespace hpx::parcelset::policies::libfabric; - - template <> - struct connection_handler_traits - { - using connection_type = policies::libfabric::sender; - using send_early_parcel = HPX_PARCELPORT_LIBFABRIC_HAVE_BOOTSTRAPPING; - using do_background_work = std::true_type; - using send_immediate_parcels = std::true_type; - using is_connectionless = std::false_type; - - static constexpr const char* type() noexcept - { - return "libfabric"; - } - - static constexpr const char* pool_name() noexcept - { - return "parcel-pool-libfabric"; - } - - static constexpr const char* pool_name_postfix() noexcept - { - return "-libfabric"; - } - }; - /* -namespace policies { -namespace libfabric -{ - uint32_t get_verbs_device_address(const char *devicename, const char *iface, - char *hostname) - { - FUNC_START_DEBUG_MSG - // Find the address of the I/O link device. - verbs_device_ptr linkDevice; - try { - linkDevice = verbs_device_ptr(new verbs_device(devicename, iface)); - } - catch (fabric_error& e) { - LOG_ERROR_MSG("error opening InfiniBand device: " << e.what()); - } - LOG_DEBUG_MSG("Created InfiniBand device for " - << linkDevice->get_device_name() << " using interface " - << linkDevice->get_interface_name()); - - std::stringstream temp; - in_addr_t addr = linkDevice->get_address(); - temp - << (int)((uint8_t*)&addr)[0] << "." - << (int)((uint8_t*)&addr)[1] << "." - << (int)((uint8_t*)&addr)[2] << "." - << (int)((uint8_t*)&addr)[3] << std::ends; - strcpy(hostname, temp.str().c_str()); - // - LOG_DEBUG_MSG("Generated hostname string " << hostname); - - // print device info for debugging - // linkDevice->getDeviceInfo(true); - FUNC_END_DEBUG_MSG - return (uint32_t)(addr); - } - -}} -*/ -} // namespace hpx::parcelset - -#include diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/fabric_error.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/fabric_error.hpp deleted file mode 100644 index 8f493a3aa4c5..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/fabric_error.hpp +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (C) 2016 John Biddiscombe -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See -// accompanying file LICENSE_1_0.txt or copy at - -#pragma once - -#include -#include -// -#include -#include -#include -#include -// -#include -// -namespace hpx { namespace parcelset { namespace policies { namespace libfabric { - - class fabric_error : public std::runtime_error - { - public: - // -------------------------------------------------------------------- - fabric_error(int err, const std::string& msg) - : std::runtime_error(std::string(fi_strerror(-err)) + msg) - , error_(err) - { - LOG_ERROR_MSG(msg << " : " << fi_strerror(-err)); - std::terminate(); - } - - fabric_error(int err) - : std::runtime_error(fi_strerror(-err)) - , error_(-err) - { - LOG_ERROR_MSG(what()); - std::terminate(); - } - - // -------------------------------------------------------------------- - int error_code() const - { - return error_; - } - - // -------------------------------------------------------------------- - static inline char* error_string(int err) - { - char buffer[256]; -#if (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE - return strerror_r(err, buffer, sizeof(buf)) ? nullptr : buffer; -#else - return strerror_r(err, buffer, 256); -#endif - } - - int error_; - }; - -}}}} // namespace hpx::parcelset::policies::libfabric diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/header.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/header.hpp deleted file mode 100644 index 8eb0e836b208..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/header.hpp +++ /dev/null @@ -1,440 +0,0 @@ -// Copyright (c) 2015-2016 John Biddiscombe -// Copyright (c) 2013-2015 Thomas Heller -// Copyright (c) 2013-2014 Hartmut Kaiser -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -#include - -#include -#include -#include -// -#include -#include -#include -#include -#include -#include - -// A generic header structure that can be used by parcelports -// currently, the libfabric parcelport makes use of it -namespace hpx { namespace parcelset { namespace policies { namespace libfabric { - namespace detail { - typedef serialization::serialization_chunk chunktype; - - // if chunks are not piggybacked, we must send an rma handle for chunk access - // and state how many other rma chunks need to be retrieved (since this is - // normally stored in the missing chunk info) - struct chunk_header - { - uint32_t num_rma_chunks; - chunktype chunk_rma; - }; - - // data we send if there are zero copy blocks (or non piggybacked header/chunks) - struct rma_info - { - uint64_t tag; - }; - - // data we send if message is piggybacked - struct message_info - { - uint64_t message_size; - }; - - // data we send if both message and chunk data are -not- piggybacked - // to store the rma information for the message (that otherwise would be in - // the chunk data) - struct message_chunk - { - chunktype message_rma; - }; - - // this header block is always sent - struct header_block - { - uint32_t num_chunks; - uint32_t - flags; // for padding to nice boundary (only need a few bits) - }; - } // namespace detail - - template - struct header - { - static constexpr unsigned int header_block_size = - sizeof(detail::header_block); - static constexpr unsigned int data_size_ = SIZE - header_block_size; - // - static const unsigned int chunk_flag = 0x01; // chunks piggybacked - static const unsigned int message_flag = 0x02; // message pigybacked - static const unsigned int normal_flag = - 0x04; // normal chunks present - static const unsigned int zerocopy_flag = - 0x08; // zerocopy chunks present - - typedef serialization::serialization_chunk chunktype; - - private: - // - // this is the actual header content - // - detail::header_block message_header; - std::array data_; - // the data block is laid out as follows for each optional item - // message_header - always present header_block_size - // chunk data : sizeof(chunktype) * numchunks : when chunks piggybacked - // or : sizeof(chunk_header) : when chunks not piggybacked - // rma_info : sizeof(rma_info) : when we have anything to be rma'd - // message_info : sizeof(message_info) : only when message pigybacked - // or : sizeof(message_chunk) : when message+chunk both not piggybacked - // ..... - // message : buffer.size_ : only when message piggybacked - - public: - // - template - header(Buffer const& buffer, void* tag) - { - const std::vector& chunks = buffer.chunks_; - // - message_header.flags = 0; - message_header.num_chunks = chunks.size(); - message_header.flags |= - buffer.num_chunks_.first ? zerocopy_flag : 0; - message_header.flags |= buffer.num_chunks_.second ? normal_flag : 0; - - // space occupied by chunk data - size_t chunkbytes = chunks.size() * sizeof(chunktype); - - // can we send the chunk info inside the header - // (NB. we add +1 chunk just in case of a non piggybacked message chunk) - if ((chunkbytes + sizeof(chunktype)) <= data_size_) - { - message_header.flags |= chunk_flag; - // copy chunk data directly into the header - std::memcpy( - &data_[chunk_data_offset()], chunks.data(), chunkbytes); - } - else - { - LOG_DEBUG_MSG("Too many chunks for header " - << decnumber(chunks.size()) << "requires bytes " - << decnumber(chunkbytes)); - message_header.flags &= ~chunk_flag; - message_header.flags |= zerocopy_flag; - // send just rma-get information, address and rma key will be added later - detail::chunk_header* ch = - reinterpret_cast( - &data_[chunk_data_offset()]); - ch->num_rma_chunks = buffer.num_chunks_.first; - ch->chunk_rma = - serialization::create_pointer_chunk(nullptr, chunkbytes, 0); - // reset chunkbytes size to size of rma hunk header - chunkbytes = sizeof(detail::chunk_header); - } - - // can we send main message inside the header - if (buffer.data_.size() <= (data_size_ - chunkbytes)) - { - message_header.flags |= message_flag; - detail::message_info* info = message_info_ptr(); - info->message_size = buffer.size_; - } - else - { - message_header.flags &= ~message_flag; - message_header.flags |= zerocopy_flag; - if ((message_header.flags & chunk_flag) != 0) - { - // if chunks are piggybacked, just add one rma chunk for the message - message_header.num_chunks += 1; - chunktype message = serialization::create_pointer_chunk( - nullptr, buffer.size_, 0); - std::memcpy( - &data_[chunkbytes], &message, sizeof(chunktype)); - } - else - { - // the message isn't piggybacked and neither is the chunk data - // so we must add rma-get information for the message - detail::message_chunk* mc = - reinterpret_cast( - &data_[message_info_offset()]); - LOG_DEBUG_MSG("Setting chunk free message size to " - << decnumber(buffer.size_) << "offset " - << decnumber(message_info_offset())); - mc->message_rma = serialization::create_pointer_chunk( - nullptr, buffer.size_, 0); - } - } - - // set the rma tag - if ((message_header.flags & zerocopy_flag) != 0) - { - auto ptr = rma_info_ptr(); - ptr->tag = reinterpret_cast(tag); - } - - LOG_DEBUG_MSG("Header : " << *this); - } - - // -------------------------------------------------------------------- - friend std::ostream& operator<<(std::ostream& os, header& h) - { - os << "Flags " << hexbyte(h.message_header.flags) - << "chunk_data_offset " << decnumber(h.chunk_data_offset()) - << "rma_info_offset " << decnumber(h.rma_info_offset()) - << "message_info_offset " << decnumber(h.message_info_offset()) - << "message_offset " << decnumber(h.message_offset()) - << "header length " << decnumber(h.header_length()) - << "message length " << hexlength(h.message_size()) << "chunks " - << decnumber(h.num_chunks()) << "zerocopy ( " - << decnumber(h.num_zero_copy_chunks()) << ") " - << "normal ( " << decnumber(h.num_index_chunks()) << ") " - << "piggyback " << decnumber((h.message_piggy_back())) << "tag " - << hexuint64(h.tag()); - return os; - } - - public: - // ------------------------------------------------------------------ - // if chunks are piggybacked, return pointer to list of chunk data - inline char* chunk_ptr() - { - if ((message_header.flags & chunk_flag) == 0) - { - return nullptr; - } - return reinterpret_cast(&data_[chunk_data_offset()]); - } - - // ------------------------------------------------------------------ - // if chunks are not piggybacked, return pointer to chunk rma info - inline detail::chunk_header* chunk_header_ptr() - { - if ((message_header.flags & chunk_flag) == 0) - { - return reinterpret_cast( - &data_[chunk_data_offset()]); - } - return nullptr; - } - - // ------------------------------------------------------------------ - // if there are rma blocks, return pointer to the rma tag - inline detail::rma_info* rma_info_ptr() - { - if ((message_header.flags & zerocopy_flag) == 0) - { - return nullptr; - } - return reinterpret_cast( - &data_[rma_info_offset()]); - } - - // ------------------------------------------------------------------ - // if message is piggybacked, return pointer to start of message block - inline detail::message_info* message_info_ptr() - { - if ((message_header.flags & message_flag) == 0) - { - return nullptr; - } - return reinterpret_cast( - &data_[message_info_offset()]); - } - - // ------------------------------------------------------------------ - // if message+chunk are not piggybacked, return pointer to message chunk - inline detail::message_chunk* message_chunk_ptr() - { - if ((message_header.flags & message_flag) == 0 && - (message_header.flags & chunk_flag) == 0) - { - return reinterpret_cast( - &data_[message_info_offset()]); - } - return nullptr; - } - - // ------------------------------------------------------------------ - inline char* message_ptr() - { - if ((message_header.flags & message_flag) == 0) - { - return nullptr; - } - return reinterpret_cast(&data_[message_offset()]); - } - - // ------------------------------------------------------------------ - inline uint32_t chunk_data_offset() const - { - // just in case we ever add any new stuff - return 0; - } - - inline uint32_t rma_info_offset() const - { - // add the chunk data offset - std::uint32_t size = chunk_data_offset(); - if ((message_header.flags & chunk_flag) != 0) - { - size = (message_header.num_chunks * sizeof(chunktype)); - } - else - { - // chunks are not piggybacked, insert rma details - size = sizeof(detail::chunk_header); - } - return size; - } - - inline uint32_t message_info_offset() const - { - // add the rma info offset - std::uint32_t size = rma_info_offset(); - if ((message_header.flags & zerocopy_flag) != 0) - { - size += sizeof(detail::rma_info); - } - return size; - } - - inline uint32_t message_offset() const - { - // add the message info offset - std::uint32_t size = message_info_offset(); - if ((message_header.flags & message_flag) != 0) - { - size += sizeof(detail::message_info); - } - else if ((message_header.flags & message_flag) == 0 && - (message_header.flags & chunk_flag) == 0) - { - size += sizeof(detail::message_chunk); - } - return size; - } - - // ------------------------------------------------------------------ - // here beginneth the main public API - // ------------------------------------------------------------------ - inline char* chunk_data() - { - return chunk_ptr(); - } - - inline char* message_data() - { - return message_ptr(); - } - - inline bool message_piggy_back() - { - return message_ptr() != nullptr; - } - - inline uint64_t tag() - { - auto ptr = rma_info_ptr(); - return ptr ? ptr->tag : 0; - } - - inline uint32_t message_size() - { - auto ptr = message_info_ptr(); - if (ptr) - { - return ptr->message_size; - } - // if the data is not piggybacked then look at the final chunk - chunktype* chunks = reinterpret_cast(chunk_ptr()); - if (!chunks) - { - detail::message_chunk* mc = message_chunk_ptr(); - LOG_DEBUG_MSG("chunk free message size is " - << decnumber(mc->message_rma.size_) << "offset was " - << decnumber(message_info_offset())); - return mc->message_rma.size_; - } - return chunks[message_header.num_chunks - 1].size_; - } - - // the full size of all the header information - inline std::uint32_t header_length() - { - std::uint32_t size = header_block_size + message_offset(); - return size; - } - - inline void set_message_rdma_info(uint64_t key, const void* addr) - { - chunktype* chunks = reinterpret_cast(chunk_ptr()); - if (!chunks) - { - detail::message_chunk* mc = message_chunk_ptr(); - chunks = &mc->message_rma; - } - else - { - chunks = &chunks[message_header.num_chunks - 1]; - } - // the last chunk will be our RMA message chunk - chunks->rkey_ = key; - chunks->data_.cpos_ = addr; - } - - std::uint32_t num_chunks() - { - return message_header.num_chunks; - } - - std::uint32_t num_zero_copy_chunks() - { - chunktype* chunks = reinterpret_cast(chunk_ptr()); - if (!chunks) - { - throw std::runtime_error( - "num_zero_copy_chunks without chunk data"); - } - uint32_t num = 0; - for (uint32_t i = 0; i < message_header.num_chunks; ++i) - { - if (chunks[i].type_ == - serialization::chunk_type::chunk_type_pointer) - { - ++num; - } - } - return num; - } - - std::uint32_t num_index_chunks() - { - chunktype* chunks = reinterpret_cast(chunk_ptr()); - if (!chunks) - { - throw std::runtime_error("num_index_chunks without chunk data"); - } - uint32_t num = 0; - for (uint32_t i = 0; i < message_header.num_chunks; ++i) - { - if (chunks[i].type_ == - serialization::chunk_type::chunk_type_index) - { - ++num; - } - } - return num; - } - }; - -}}}} // namespace hpx::parcelset::policies::libfabric diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/libfabric_controller.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/libfabric_controller.hpp deleted file mode 100644 index c52943ca28c2..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/libfabric_controller.hpp +++ /dev/null @@ -1,1342 +0,0 @@ -// Copyright (c) 2016 John Biddiscombe -// Copyright (c) 2017 Thomas Heller -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -// config -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -// -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -// -#include -#include -#include -#include -#include -#include -#include - -#if defined(HPX_PARCELPORT_LIBFABRIC_GNI) || \ - defined(HPX_PARCELPORT_LIBFABRIC_SOCKETS) || \ - defined(HPX_PARCELPORT_LIBFABRIC_PSM2) -#define HPX_PARCELPORT_LIBFABRIC_ENDPOINT_RDM -#else -#define HPX_PARCELPORT_LIBFABRIC_ENDPOINT_MSG -#endif - -#ifdef HPX_PARCELPORT_LIBFABRIC_GNI -#include "rdma/fi_ext_gni.h" -#endif - -#ifdef HPX_PARCELPORT_LIBFABRIC_HAVE_PMI -// -#include -// -#include -#include -#include - -using namespace boost::archive::iterators; - -typedef base64_from_binary> - base64_t; - -typedef transform_width, 8, 6> - binary_t; -#endif - -namespace hpx { namespace parcelset { namespace policies { namespace libfabric { - - class libfabric_controller - { - public: - typedef hpx::spinlock mutex_type; - typedef hpx::parcelset::policies::libfabric::unique_lock - unique_lock; - typedef hpx::parcelset::policies::libfabric::scoped_lock - scoped_lock; - - // NOTE: Connection maps are not used for endpoint type RDM - // when a new connection is requested, it will be completed asynchronously - // we need a promise/future for each endpoint so that we can set the new - // endpoint when the connection completes and is ready - // Note - only used during connection, then deleted - typedef std::tuple, hpx::shared_future> - promise_tuple_type; - - // lock types for maps - typedef hpx::concurrent::unordered_map::map_read_lock_type map_read_lock_type; - typedef hpx::concurrent::unordered_map::map_write_lock_type map_write_lock_type; - - // Map of connections started, needed until connection is completed - hpx::concurrent::unordered_map - endpoint_tmp_; - std::unordered_map endpoint_av_; - - locality here_; - locality agas_; - - struct fi_info* fabric_info_; - struct fid_fabric* fabric_; - struct fid_domain* fabric_domain_; - // Server/Listener for RDMA connections. - struct fid_pep* ep_passive_; - struct fid_ep* ep_active_; - struct fid_ep* ep_shared_rx_cxt_; - - // we will use just one event queue for all connections - struct fid_eq* event_queue_; - struct fid_cq *txcq_, *rxcq_; - struct fid_av* av_; - - bool immediate_; - - // -------------------------------------------------------------------- - // constructor gets info from device and sets up all necessary - // maps, queues and server endpoint etc - libfabric_controller(std::string const& provider, - std::string const& domain, std::string const& endpoint, - int /*port*/ = 7910) - : fabric_info_(nullptr) - , fabric_(nullptr) - , fabric_domain_(nullptr) - , ep_passive_(nullptr) - , ep_active_(nullptr) - , ep_shared_rx_cxt_(nullptr) - , event_queue_(nullptr) - // - , txcq_(nullptr) - , rxcq_(nullptr) - , av_(nullptr) - // - , immediate_(false) - , event_pause_(0) - { - FUNC_START_DEBUG_MSG; - open_fabric(provider, domain, endpoint); - - // Create a memory pool for pinned buffers - memory_pool_.reset( - new rma_memory_pool(fabric_domain_)); - - // setup a passive listener, or an active RDM endpoint - here_ = create_local_endpoint(); -#if !defined(HPX_PARCELPORT_LIBFABRIC_ENDPOINT_RDM) - create_event_queue(); -#endif - - LOG_DEBUG_MSG("Calling boot PMI"); - boot_PMI(); - FUNC_END_DEBUG_MSG; - } - - void boot_PMI() - { -#ifdef HPX_PARCELPORT_LIBFABRIC_HAVE_PMI - int spawned; - int size; - int rank; - int appnum; - - LOG_DEBUG_MSG("Calling PMI init"); - PMI2_Init(&spawned, &size, &rank, &appnum); - LOG_DEBUG_MSG("Called PMI init on rank" << decnumber(rank)); - - // create address vector and queues we need if bootstrapping - create_completion_queues(fabric_info_, size); - - // we must pass out libfabric data to other nodes - // encode it as a string to put into the PMI KV store - std::string encoded_locality( - base64_t((const char*) (here_.fabric_data())), - base64_t((const char*) (here_.fabric_data()) + - locality::array_size)); - int encoded_length = encoded_locality.size(); - LOG_DEBUG_MSG("Encoded locality as " << encoded_locality - << " with length " - << decnumber(encoded_length)); - - // Key name for PMI - std::string pmi_key = "hpx_libfabric_" + std::to_string(rank); - // insert out data in the KV store - LOG_DEBUG_MSG("Calling PMI2_KVS_Put on rank " << decnumber(rank)); - PMI2_KVS_Put(pmi_key.data(), encoded_locality.data()); - - // Wait for all to do the same - LOG_DEBUG_MSG("Calling PMI2_KVS_Fence on rank " << decnumber(rank)); - PMI2_KVS_Fence(); - - // read libfabric data for all nodes and insert into our Address vector - for (int i = 0; i < size; ++i) - { - // read one locality key - std::string pmi_key = "hpx_libfabric_" + std::to_string(i); - char encoded_data[locality::array_size * 2]; - int length = 0; - PMI2_KVS_Get(0, i, pmi_key.data(), encoded_data, - encoded_length + 1, &length); - if (length != encoded_length) - { - LOG_ERROR_MSG("PMI value length mismatch, expected " - << decnumber(encoded_length) << "got " - << decnumber(length)); - } - - // decode the string back to raw locality data - LOG_DEBUG_MSG("Calling decode for " << decnumber(i) - << " locality data on rank " - << decnumber(rank)); - locality new_locality; - std::copy(binary_t(encoded_data), - binary_t(encoded_data + encoded_length), - (new_locality.fabric_data_writable())); - - // insert locality into address vector - LOG_DEBUG_MSG("Calling insert_address for " - << decnumber(i) << "on rank " << decnumber(rank)); - insert_address(new_locality); - LOG_DEBUG_MSG( - "rank " << decnumber(i) << "added to address vector"); - if (i == 0) - { - agas_ = new_locality; - } - } - - PMI2_Finalize(); -#endif - } - - // -------------------------------------------------------------------- - // clean up all resources - ~libfabric_controller() - { - unsigned int messages_handled = 0; - unsigned int acks_received = 0; - unsigned int msg_plain = 0; - unsigned int msg_rma = 0; - unsigned int sent_ack = 0; - unsigned int rma_reads = 0; - unsigned int recv_deletes = 0; - // - for (auto& r : receivers_) - { - r.cleanup(); - // from receiver - messages_handled += r.messages_handled_; - acks_received += r.acks_received_; - // from rma_receivers - msg_plain += r.msg_plain_; - msg_rma += r.msg_rma_; - sent_ack += r.sent_ack_; - rma_reads += r.rma_reads_; - recv_deletes += r.recv_deletes_; - } - - LOG_DEBUG_MSG("Received messages " - << decnumber(messages_handled) << "Received acks " - << decnumber(acks_received) << "Sent acks " - << decnumber(sent_ack) << "Total reads " << decnumber(rma_reads) - << "Total deletes " << decnumber(recv_deletes) - << "deletes error " - << decnumber(messages_handled - recv_deletes)); - - // Cleaning up receivers to avoid memory leak errors. - receivers_.clear(); - - LOG_DEBUG_MSG("closing fabric_->fid"); - if (fabric_) - fi_close(&fabric_->fid); -#ifdef HPX_PARCELPORT_LIBFABRIC_ENDPOINT_RDM - LOG_DEBUG_MSG("closing ep_active_->fid"); - if (ep_active_) - fi_close(&ep_active_->fid); -#else - LOG_DEBUG_MSG("closing ep_passive_->fid"); - if (ep_passive_) - fi_close(&ep_passive_->fid); -#endif - LOG_DEBUG_MSG("closing event_queue_->fid"); - if (event_queue_) - fi_close(&event_queue_->fid); - LOG_DEBUG_MSG("closing fabric_domain_->fid"); - if (fabric_domain_) - fi_close(&fabric_domain_->fid); - LOG_DEBUG_MSG("closing ep_shared_rx_cxt_->fid"); - if (ep_shared_rx_cxt_) - fi_close(&ep_shared_rx_cxt_->fid); - // clean up - LOG_DEBUG_MSG("freeing fabric_info"); - fi_freeinfo(fabric_info_); - } - - // -------------------------------------------------------------------- - // initialize the basic fabric/domain/name - void open_fabric(std::string const& provider, std::string const& domain, - std::string const& endpoint_type) - { - FUNC_START_DEBUG_MSG; - struct fi_info* fabric_hints_ = fi_allocinfo(); - if (!fabric_hints_) - { - throw fabric_error(-1, "Failed to allocate fabric hints"); - } - // we require message and RMA support, so ask for them - // we also want receives to carry source address info - fabric_hints_->caps = FI_MSG | FI_RMA | FI_SOURCE | FI_WRITE | - FI_READ | FI_REMOTE_READ | FI_REMOTE_WRITE | FI_RMA_EVENT; - fabric_hints_->mode = FI_CONTEXT | FI_LOCAL_MR; - fabric_hints_->fabric_attr->prov_name = strdup(provider.c_str()); - LOG_DEBUG_MSG( - "fabric provider " << fabric_hints_->fabric_attr->prov_name); - if (domain.size() > 0) - { - fabric_hints_->domain_attr->name = strdup(domain.c_str()); - LOG_DEBUG_MSG( - "fabric domain " << fabric_hints_->domain_attr->name); - } - - // use infiniband type basic registration for now - fabric_hints_->domain_attr->mr_mode = FI_MR_BASIC; - - // Disable the use of progress threads - fabric_hints_->domain_attr->control_progress = FI_PROGRESS_MANUAL; - fabric_hints_->domain_attr->data_progress = FI_PROGRESS_MANUAL; - - // Enable thread safe mode Does not work with psm2 provider - fabric_hints_->domain_attr->threading = FI_THREAD_SAFE; - - // Enable resource management - fabric_hints_->domain_attr->resource_mgmt = FI_RM_ENABLED; - -#ifdef HPX_PARCELPORT_LIBFABRIC_ENDPOINT_RDM - LOG_DEBUG_MSG("Selecting endpoint type RDM"); - fabric_hints_->ep_attr->type = FI_EP_RDM; -#else - // we will use a shared receive context for active endpoints - fabric_hints_->ep_attr->rx_ctx_cnt = FI_SHARED_CONTEXT; - - if (endpoint_type == "msg") - { - fabric_hints_->ep_attr->type = FI_EP_MSG; - } - else if (endpoint_type == "rdm") - { - fabric_hints_->ep_attr->type = FI_EP_RDM; - } - else if (endpoint_type == "dgram") - { - fabric_hints_->ep_attr->type = FI_EP_DGRAM; - } - else - { - LOG_DEBUG_MSG("endpoint type not set, using RDM"); - fabric_hints_->ep_attr->type = FI_EP_RDM; - } -#endif - - // by default, we will always want completions on both tx/rx events - fabric_hints_->tx_attr->op_flags = FI_COMPLETION; - fabric_hints_->rx_attr->op_flags = FI_COMPLETION; - - uint64_t flags = 0; - LOG_DEBUG_MSG("Getting initial info about fabric"); - int ret = fi_getinfo(FI_VERSION(1, 4), nullptr, nullptr, flags, - fabric_hints_, &fabric_info_); - if (ret) - { - throw fabric_error(ret, "Failed to get fabric info"); - } - LOG_DEBUG_MSG( - "Fabric info " << fi_tostr(fabric_info_, FI_TYPE_INFO)); - - immediate_ = (fabric_info_->rx_attr->mode & FI_RX_CQ_DATA) != 0; - LOG_DEBUG_MSG("Fabric supports immediate data " << immediate_); - LOG_EXCLUSIVE( - bool context = (fabric_hints_->mode & FI_CONTEXT) != 0); - LOG_DEBUG_MSG("Fabric requires FI_CONTEXT " << context); - - LOG_DEBUG_MSG("Creating fabric object"); - ret = fi_fabric(fabric_info_->fabric_attr, &fabric_, nullptr); - if (ret) - { - throw fabric_error(ret, "Failed to get fi_fabric"); - } - - // Allocate a domain. - LOG_DEBUG_MSG("Allocating domain "); - ret = fi_domain(fabric_, fabric_info_, &fabric_domain_, nullptr); - if (ret) - throw fabric_error(ret, "fi_domain"); - - // Cray specific. Disable memory registration cache - _set_disable_registration(); - - fi_freeinfo(fabric_hints_); - FUNC_END_DEBUG_MSG; - } - - // ------------------------------------------------------------------- - // create endpoint and get ready for possible communications - void startup(parcelport* pp) - { - FUNC_START_DEBUG_MSG; - // -#ifdef HPX_PARCELPORT_LIBFABRIC_ENDPOINT_RDM - bind_endpoint_to_queues(ep_active_); -#else - bind_passive_endpoint_to_queues(ep_passive_); - fabric_info_->handle = &(ep_passive_->fid); - - LOG_DEBUG_MSG("Creating active endpoint"); - new_endpoint_active(fabric_info_, &ep_active_); - LOG_DEBUG_MSG("active endpoint " << hexpointer(ep_active_)); - - bind_endpoint_to_queues(ep_active_); -#endif - - // filling our vector of receivers... - std::size_t num_receivers = HPX_PARCELPORT_LIBFABRIC_MAX_PREPOSTS; - receivers_.reserve(num_receivers); - for (std::size_t i = 0; i != num_receivers; ++i) - { - receivers_.emplace_back(pp, ep_active_, *memory_pool_); - } - } - - // -------------------------------------------------------------------- - // Special GNI extensions to disable memory registration cache - - // this helper function only works for string ops - void _set_check_domain_op_value(int op, const char* value) - { -#ifdef HPX_PARCELPORT_LIBFABRIC_GNI - int ret; - struct fi_gni_ops_domain* gni_domain_ops; - char* get_val; - - ret = fi_open_ops(&fabric_domain_->fid, FI_GNI_DOMAIN_OPS_1, 0, - (void**) &gni_domain_ops, nullptr); - if (ret) - throw fabric_error(ret, "fi_open_ops"); - LOG_DEBUG_MSG("domain ops returned " << hexpointer(gni_domain_ops)); - - // different versions of clang-format disagree - // clang-format off - ret = gni_domain_ops->set_val( - &fabric_domain_->fid, (dom_ops_val_t) (op), &value); - if (ret) - throw fabric_error(ret, "set val (ops)"); - - ret = gni_domain_ops->get_val( - &fabric_domain_->fid, (dom_ops_val_t) (op), &get_val); - // clang-format on - - LOG_DEBUG_MSG("Cache mode set to " << get_val); - if (std::string(value) != std::string(get_val)) - throw fabric_error(ret, "get val"); -#else - (void) op; - (void) value; -#endif - } - - void _set_disable_registration() - { -#ifdef HPX_PARCELPORT_LIBFABRIC_GNI - _set_check_domain_op_value(GNI_MR_CACHE, "none"); -#endif - } - - // ------------------------------------------------------------------- - void create_event_queue() - { - LOG_DEBUG_MSG("Creating event queue"); - fi_eq_attr eq_attr = {}; - eq_attr.wait_obj = FI_WAIT_NONE; - int ret = fi_eq_open(fabric_, &eq_attr, &event_queue_, nullptr); - if (ret) - throw fabric_error(ret, "fi_eq_open"); - - if (fabric_info_->ep_attr->type == FI_EP_MSG) - { - LOG_DEBUG_MSG("Binding event queue to passive endpoint"); - ret = fi_pep_bind(ep_passive_, &event_queue_->fid, 0); - if (ret) - throw fabric_error(ret, "fi_pep_bind"); - - LOG_DEBUG_MSG("Passive endpoint : listen"); - ret = fi_listen(ep_passive_); - if (ret) - throw fabric_error(ret, "fi_listen"); - - LOG_DEBUG_MSG("Allocating shared receive context"); - ret = fi_srx_context(fabric_domain_, fabric_info_->rx_attr, - &ep_shared_rx_cxt_, nullptr); - if (ret) - throw fabric_error(ret, "fi_srx_context"); - } - FUNC_END_DEBUG_MSG; - } - - // -------------------------------------------------------------------- - locality create_local_endpoint() - { - struct fid* id; - int ret; -#ifdef HPX_PARCELPORT_LIBFABRIC_ENDPOINT_RDM - LOG_DEBUG_MSG("Creating active endpoint"); - new_endpoint_active(fabric_info_, &ep_active_); - LOG_DEBUG_MSG("active endpoint " << hexpointer(ep_active_)); - id = &ep_active_->fid; -#else - LOG_DEBUG_MSG("Creating passive endpoint"); - ret = fi_passive_ep(fabric_, fabric_info_, &ep_passive_, nullptr); - if (ret) - { - throw fabric_error(ret, "Failed to create fi_passive_ep"); - } - LOG_DEBUG_MSG("passive endpoint " << hexpointer(ep_passive_)); - id = &ep_passive_->fid; -#endif - -#ifdef HPX_HAVE_PARCELPORT_TCP - // with tcp we do not use PMI boot, so enable the endpoint now - LOG_DEBUG_MSG("Enabling endpoint (TCP) " << hexpointer(ep_active_)); - ret = fi_enable(ep_active_); - if (ret) - throw fabric_error(ret, "fi_enable"); -#endif - - locality::locality_data local_addr; - std::size_t addrlen = locality::array_size; - LOG_DEBUG_MSG( - "Fetching local address using size " << decnumber(addrlen)); - ret = fi_getname(id, local_addr.data(), &addrlen); - if (ret || (addrlen > locality::array_size)) - { - fabric_error(ret, "fi_getname - size error or other problem"); - } - - LOG_EXCLUSIVE({ - std::stringstream temp1; - for (std::size_t i = 0; i < locality::array_length; ++i) - { - temp1 << ipaddress(local_addr[i]); - } - LOG_DEBUG_MSG("address info is " << temp1.str().c_str()); - std::stringstream temp2; - for (std::size_t i = 0; i < locality::array_length; ++i) - { - temp2 << hexuint32(local_addr[i]); - } - LOG_DEBUG_MSG("address info is " << temp2.str().c_str()); - }); - FUNC_END_DEBUG_MSG; - return locality(local_addr); - } - - // -------------------------------------------------------------------- - void new_endpoint_active( - struct fi_info* info, struct fid_ep** new_endpoint) - { - FUNC_START_DEBUG_MSG; - // create an 'active' endpoint that can be used for sending/receiving - LOG_DEBUG_MSG("Creating active endpoint"); - LOG_DEBUG_MSG( - "Got info mode " << (info->mode & FI_NOTIFY_FLAGS_ONLY)); - int ret = fi_endpoint(fabric_domain_, info, new_endpoint, nullptr); - if (ret) - throw fabric_error(ret, "fi_endpoint"); - - if (info->ep_attr->type == FI_EP_MSG) - { - if (event_queue_) - { - LOG_DEBUG_MSG("Binding endpoint to EQ"); - ret = fi_ep_bind(*new_endpoint, &event_queue_->fid, 0); - if (ret) - throw fabric_error(ret, "bind event_queue_"); - } - } - } - - // -------------------------------------------------------------------- - void bind_endpoint_to_queues(struct fid_ep* endpoint) - { - int ret; - if (av_) - { - LOG_DEBUG_MSG("Binding endpoint to AV"); - ret = fi_ep_bind(endpoint, &av_->fid, 0); - if (ret) - throw fabric_error(ret, "bind event_queue_"); - } - - if (txcq_) - { - LOG_DEBUG_MSG("Binding endpoint to TX CQ"); - ret = fi_ep_bind(endpoint, &txcq_->fid, FI_TRANSMIT); - if (ret) - throw fabric_error(ret, "bind txcq"); - } - - if (rxcq_) - { - LOG_DEBUG_MSG("Binding endpoint to RX CQ"); - ret = fi_ep_bind(endpoint, &rxcq_->fid, FI_RECV); - if (ret) - throw fabric_error(ret, "rxcq"); - } - - if (ep_shared_rx_cxt_) - { - LOG_DEBUG_MSG("Binding endpoint to shared receive context"); - ret = fi_ep_bind(endpoint, &ep_shared_rx_cxt_->fid, 0); - if (ret) - throw fabric_error(ret, "ep_shared_rx_cxt_"); - } - - LOG_DEBUG_MSG("Enabling endpoint " << hexpointer(endpoint)); - ret = fi_enable(endpoint); - if (ret) - throw fabric_error(ret, "fi_enable"); - - FUNC_END_DEBUG_MSG; - } - - void bind_passive_endpoint_to_queues(struct fid_pep* endpoint) - { - int ret; - if (av_) - { - LOG_DEBUG_MSG("Binding passive endpoint to AV"); - ret = fi_pep_bind(endpoint, &av_->fid, 0); - if (ret) - throw fabric_error(ret, "bind event_queue_"); - } - - if (txcq_) - { - LOG_DEBUG_MSG("Binding passive endpoint to TX CQ"); - ret = fi_pep_bind(endpoint, &txcq_->fid, FI_TRANSMIT); - if (ret) - throw fabric_error(ret, "bind txcq"); - } - - if (rxcq_) - { - LOG_DEBUG_MSG("Binding passive endpoint to RX CQ"); - ret = fi_pep_bind(endpoint, &rxcq_->fid, FI_RECV); - if (ret) - throw fabric_error(ret, "rxcq"); - } - - if (ep_shared_rx_cxt_) - { - LOG_DEBUG_MSG( - "Binding passive endpoint to shared receive context"); - ret = fi_pep_bind(endpoint, &ep_shared_rx_cxt_->fid, 0); - if (ret) - throw fabric_error(ret, "ep_shared_rx_cxt_"); - } - - FUNC_END_DEBUG_MSG; - } - - // -------------------------------------------------------------------- - void initialize_localities() - { - FUNC_START_DEBUG_MSG; -#if !defined(HPX_PARCELPORT_LIBFABRIC_HAVE_PMI) - std::uint32_t N = hpx::get_config().get_num_localities(); - LOG_DEBUG_MSG( - "Parcelport initialize_localities with " << N << " localities"); - - // make sure address vector is created - create_completion_queues(fabric_info_, N); - - for (std::uint32_t i = 0; i < N; ++i) - { - hpx::naming::gid_type l = - hpx::naming::get_gid_from_locality_id(i); - LOG_DEBUG_MSG("Resolving locality" << l); - // each locality may be reachable by mutiplte parcelports - const parcelset::endpoints_type& res = - agas::resolve_locality(l); - // get the fabric related data - auto it = res.find("libfabric"); - LOG_DEBUG_MSG("locality resolution " << it->first << " => " - << it->second); - const hpx::parcelset::locality& fabric_locality = it->second; - const locality& loc = fabric_locality.get(); - // put the provide specific data into the address vector - // so that we can look it up later - /*fi_addr_t dummy =*/insert_address(loc); - } -#endif - LOG_DEBUG_MSG("Done getting localities "); - FUNC_END_DEBUG_MSG; - } - - // -------------------------------------------------------------------- - fi_addr_t get_fabric_address(const locality& dest_fabric) - { - uint64_t key = - (uint64_t) dest_fabric.ip_address() << 32 | dest_fabric.port(); - return endpoint_av_.find(key)->second; - } - - // -------------------------------------------------------------------- - const locality& here() const - { - return here_; - } - - // -------------------------------------------------------------------- - const bool& immedate_data_supported() const - { - return immediate_; - } - - // -------------------------------------------------------------------- - // returns true when all connections have been disconnected and none are active - bool isTerminated() - { - return false; - //return (qp_endpoint_map_.size() == 0); - } - - // types we need for connection and disconnection callback functions - // into the main parcelport code. - typedef hpx::function - ConnectionFunction; - typedef hpx::function - DisconnectionFunction; - - // -------------------------------------------------------------------- - // Set a callback which will be called immediately after - // RDMA_CM_EVENT_ESTABLISHED has been received. - // This should be used to initialize all structures for handling a new connection - void setConnectionFunction(ConnectionFunction f) - { - connection_function_ = f; - } - - // -------------------------------------------------------------------- - // currently not used. - void setDisconnectionFunction(DisconnectionFunction f) - { - disconnection_function_ = f; - } - - // -------------------------------------------------------------------- - // This is the main polling function that checks for work completions - // and connection manager events, if stopped is true, then completions - // are thrown away, otherwise the completion callback is triggered - int poll_endpoints(bool stopped = false) - { - int work = poll_for_work_completions(); - -#ifdef HPX_PARCELPORT_LIBFABRIC_ENDPOINT_MSG - work += poll_event_queue(stopped); -#endif - return work; - } - - // -------------------------------------------------------------------- - int poll_for_work_completions() - { - // @TODO, disable polling until queues are initialized to avoid this check - // if queues are not setup, don't poll - if (HPX_UNLIKELY(!rxcq_)) - return 0; - // - return poll_send_queue() + poll_recv_queue(); - } - - // -------------------------------------------------------------------- - int poll_send_queue() - { - LOG_TIMED_INIT(poll); - LOG_TIMED_BLOCK( - poll, DEVEL, 5.0, { LOG_DEBUG_MSG("poll_send_queue"); }); - - fi_cq_msg_entry entry; - int ret = 0; - { - std::unique_lock l( - polling_mutex_, std::try_to_lock); - if (l) - ret = fi_cq_read(txcq_, &entry, 1); - } - if (ret > 0) - { - LOG_DEBUG_MSG("Completion txcq wr_id " - << fi_tostr(&entry.flags, FI_TYPE_OP_FLAGS) << " (" - << decnumber(entry.flags) << ") " - << "context " << hexpointer(entry.op_context) << "length " - << hexuint32(entry.len)); - if (entry.flags & FI_RMA) - { - LOG_DEBUG_MSG("Received a txcq RMA completion " - << "Context " << hexpointer(entry.op_context)); - rma_receiver* rcv = - reinterpret_cast(entry.op_context); - rcv->handle_rma_read_completion(); - } - else if (entry.flags == (FI_MSG | FI_SEND)) - { - LOG_DEBUG_MSG("Received a txcq RMA send completion"); - sender* handler = - reinterpret_cast(entry.op_context); - handler->handle_send_completion(); - } - else - { - LOG_DEBUG_MSG( - "$$$$$ Received an unknown txcq completion ***** " - << decnumber(entry.flags)); - std::terminate(); - } - return 1; - } - else if (ret == 0 || ret == -FI_EAGAIN) - { - // do nothing, we will try again on the next check - LOG_TIMED_MSG(poll, DEVEL, 10, "txcq FI_EAGAIN"); - } - else if (ret == -FI_EAVAIL) - { - struct fi_cq_err_entry e = {}; - int err_sz = fi_cq_readerr(txcq_, &e, 0); - // from the manpage 'man 3 fi_cq_readerr' - // - // On error, a negative value corresponding to - // 'fabric errno' is returned - // - if (e.err == err_sz) - { - LOG_ERROR_MSG("txcq_ Error with len " - << hexlength(e.len) << "context " - << hexpointer(e.op_context)); - } - // flags might not be set correctly - if (e.flags == (FI_MSG | FI_SEND)) - { - LOG_ERROR_MSG("txcq Error for FI_SEND with len " - << hexlength(e.len) << "context " - << hexpointer(e.op_context)); - } - if (e.flags & FI_RMA) - { - LOG_ERROR_MSG("txcq Error for FI_RMA with len " - << hexlength(e.len) << "context " - << hexpointer(e.op_context)); - } - rma_base* base = reinterpret_cast(e.op_context); - base->handle_error(e); - } - else - { - LOG_ERROR_MSG("unknown error in completion txcq read"); - } - return 0; - } - - // -------------------------------------------------------------------- - int poll_recv_queue() - { - LOG_TIMED_INIT(poll); - LOG_TIMED_BLOCK( - poll, DEVEL, 5.0, { LOG_DEBUG_MSG("poll_recv_queue"); }); - - int result = 0; - fi_addr_t src_addr; - fi_cq_msg_entry entry; - - // receives will use fi_cq_readfrom as we want the source address - int ret = 0; - { - std::unique_lock l( - polling_mutex_, std::try_to_lock); - if (l) - ret = fi_cq_readfrom(rxcq_, &entry, 1, &src_addr); - } - if (ret > 0) - { - LOG_DEBUG_MSG("Completion rxcq wr_id " - << fi_tostr(&entry.flags, FI_TYPE_OP_FLAGS) << " (" - << decnumber(entry.flags) << ") " - << "source " << hexpointer(src_addr) << "context " - << hexpointer(entry.op_context) << "length " - << hexuint32(entry.len)); - if (src_addr == FI_ADDR_NOTAVAIL) - { - LOG_DEBUG_MSG("Source address not available...\n"); - std::terminate(); - } - // if ((entry.flags & FI_RMA) == FI_RMA) { - // LOG_DEBUG_MSG("Received an rxcq RMA completion"); - // } - else if (entry.flags == (FI_MSG | FI_RECV)) - { - LOG_DEBUG_MSG("Received an rxcq recv completion " - << hexpointer(entry.op_context)); - reinterpret_cast(entry.op_context) - ->handle_recv(src_addr, entry.len); - } - else - { - LOG_DEBUG_MSG("Received an unknown rxcq completion " - << decnumber(entry.flags)); - std::terminate(); - } - result = 1; - } - else if (ret == 0 || ret == -FI_EAGAIN) - { - // do nothing, we will try again on the next check - LOG_TIMED_MSG(poll, DEVEL, 10, "rxcq FI_EAGAIN"); - } - else if (ret == -FI_EAVAIL) - { - struct fi_cq_err_entry e = {}; - int err_sz = fi_cq_readerr(rxcq_, &e, 0); - // from the manpage 'man 3 fi_cq_readerr' - // - // On error, a negative value corresponding to - // 'fabric errno' is returned - // - if (e.err == err_sz) - { - LOG_ERROR_MSG("txcq_ Error with len " - << hexlength(e.len) << "context " - << hexpointer(e.op_context)); - } - LOG_ERROR_MSG("rxcq Error with flags " - << hexlength(e.flags) << "len " << hexlength(e.len)); - } - else - { - LOG_ERROR_MSG("unknown error in completion rxcq read"); - } - return result; - } - - // -------------------------------------------------------------------- - int poll_event_queue(bool /*stopped*/ = false) - { - LOG_TIMED_INIT(poll); - LOG_TIMED_BLOCK(poll, DEVEL, 5.0, - { LOG_DEBUG_MSG("Polling event completion channel"); }) - struct fi_eq_cm_entry* cm_entry; - // struct fi_eq_entry *entry; - struct fid_ep* new_ep; - // uint32_t *addr; - uint32_t event; - std::array buffer; - ssize_t rd = fi_eq_read( - event_queue_, &event, buffer.data(), sizeof(buffer), 0); - if (rd > 0) - { - LOG_DEBUG_MSG("fi_eq_cm_entry " - << decnumber(sizeof(fi_eq_cm_entry)) << " fi_eq_entry " - << decnumber(sizeof(fi_eq_entry))); - LOG_DEBUG_MSG( - "got event " << event << " with bytes = " << decnumber(rd)); - switch (event) - { - case FI_CONNREQ: - { - cm_entry = - reinterpret_cast(buffer.data()); - locality::locality_data addressinfo; - std::memcpy(addressinfo.data(), cm_entry->info->dest_addr, - locality::array_size); - locality loc(addressinfo); - LOG_DEBUG_MSG("FI_CONNREQ from " - << ipaddress(loc.ip_address()) << "-> " - << ipaddress(here_.ip_address()) << "( " - << ipaddress(here_.ip_address()) << " )"); - { - auto result = insert_new_future(loc.ip_address()); - // if the insert fails, it means we have a connection - // already in progress, reject if we are a lower ip address - if (!result.first && - loc.ip_address() > here_.ip_address()) - { - LOG_DEBUG_MSG("FI_CONNREQ priority fi_reject " - << ipaddress(loc.ip_address()) << "-> " - << ipaddress(here_.ip_address()) << "( " - << ipaddress(here_.ip_address()) << " )"); - // int ret = fi_reject(ep_passive_, cm_entry->info->handle, - // nullptr, 0); - // if (ret) { - // throw fabric_error(ret, "new_ep fi_reject failed"); - // } - fi_freeinfo(cm_entry->info); - return 0; - } - // create a new endpoint for this request and accept it - new_endpoint_active(cm_entry->info, &new_ep); - LOG_DEBUG_MSG("Calling fi_accept " - << ipaddress(loc.ip_address()) << "-> " - << ipaddress(here_.ip_address()) << "( " - << ipaddress(here_.ip_address()) << " )"); - int ret = fi_accept( - new_ep, &here_.ip_address(), sizeof(uint32_t)); - if (ret) - throw fabric_error(ret, "new_ep fi_accept failed"); - } - fi_freeinfo(cm_entry->info); - break; - } - case FI_CONNECTED: - { - cm_entry = - reinterpret_cast(buffer.data()); - new_ep = container_of(cm_entry->fid, struct fid_ep, fid); - locality::locality_data address; - std::size_t len = sizeof(locality::locality_data); - fi_getpeer(new_ep, address.data(), &len); - // - auto present1 = endpoint_tmp_.is_in_map(address[1]); - if (!present1.second) - { - throw fabric_error( - 0, "FI_CONNECTED, endpoint map error"); - } - LOG_DEBUG_MSG("FI_CONNECTED " - << hexpointer(new_ep) - << ipaddress(locality::ip_address(address)) << "<> " - << ipaddress(here_.ip_address()) << "( " - << ipaddress(here_.ip_address()) << " )"); - - // call parcelport connection function before setting future - connection_function_(new_ep, address[1]); - - // if there is an entry for a locally started connection on this IP - // then set the future ready with the verbs endpoint - LOG_DEBUG_MSG("FI_CONNECTED setting future " - << ipaddress(locality::ip_address(address)) << "<> " - << ipaddress(here_.ip_address()) << "( " - << ipaddress(here_.ip_address()) << " )"); - - std::get<0>(endpoint_tmp_.find(address[1])->second) - .set_value(new_ep); - - // once the future is set, the entry can be removed? - // endpoint_tmp_.erase(present1.first); - } - break; - case FI_NOTIFY: - LOG_DEBUG_MSG("Got FI_NOTIFY"); - break; - case FI_SHUTDOWN: - LOG_DEBUG_MSG("Got FI_SHUTDOWN"); - break; - case FI_MR_COMPLETE: - LOG_DEBUG_MSG("Got FI_MR_COMPLETE"); - break; - case FI_AV_COMPLETE: - LOG_DEBUG_MSG("Got FI_AV_COMPLETE"); - break; - case FI_JOIN_COMPLETE: - LOG_DEBUG_MSG("Got FI_JOIN_COMPLETE"); - break; - } - // HPX_ASSERT(rd == sizeof(struct fi_eq_cm_entry)); - // HPX_ASSERT(cm_entry->fid == event_queue_->fid); - } - else - { - LOG_TIMED_MSG( - poll, DEVEL, 5, "We did not get an event completion") - } - return 0; - } - - // -------------------------------------------------------------------- - inline struct fid_domain* get_domain() - { - return fabric_domain_; - } - - // -------------------------------------------------------------------- - inline rma_memory_pool& get_memory_pool() - { - return *memory_pool_; - } - - // -------------------------------------------------------------------- - void create_completion_queues(struct fi_info* info, int N) - { - FUNC_START_DEBUG_MSG; - - // only one thread must be allowed to create queues, - // and it is only required once - scoped_lock lock(initialization_mutex_); - if (txcq_ != nullptr || rxcq_ != nullptr || av_ != nullptr) - { - return; - } - - int ret; - - fi_cq_attr cq_attr = {}; - // @TODO - why do we check this - // if (cq_attr.format == FI_CQ_FORMAT_UNSPEC) { - LOG_DEBUG_MSG("Setting CQ attribute to FI_CQ_FORMAT_MSG"); - cq_attr.format = FI_CQ_FORMAT_MSG; - // } - - // open completion queue on fabric domain and set context ptr to tx queue - cq_attr.wait_obj = FI_WAIT_NONE; - cq_attr.size = info->tx_attr->size; - info->tx_attr->op_flags |= FI_COMPLETION; - cq_attr.flags = 0; //|= FI_COMPLETION; - LOG_DEBUG_MSG( - "Creating CQ with tx size " << decnumber(info->tx_attr->size)); - ret = fi_cq_open(fabric_domain_, &cq_attr, &txcq_, &txcq_); - if (ret) - throw fabric_error(ret, "fi_cq_open"); - - // open completion queue on fabric domain and set context ptr to rx queue - cq_attr.size = info->rx_attr->size; - LOG_DEBUG_MSG( - "Creating CQ with rx size " << decnumber(info->rx_attr->size)); - ret = fi_cq_open(fabric_domain_, &cq_attr, &rxcq_, &rxcq_); - if (ret) - throw fabric_error(ret, "fi_cq_open"); - - fi_av_attr av_attr = {}; - if (info->ep_attr->type == FI_EP_RDM || - info->ep_attr->type == FI_EP_DGRAM) - { - if (info->domain_attr->av_type != FI_AV_UNSPEC) - av_attr.type = info->domain_attr->av_type; - else - { - LOG_DEBUG_MSG("Setting map type to FI_AV_MAP"); - av_attr.type = FI_AV_MAP; - av_attr.count = N; - } - - LOG_DEBUG_MSG("Creating address vector "); - ret = fi_av_open(fabric_domain_, &av_attr, &av_, nullptr); - if (ret) - throw fabric_error(ret, "fi_av_open"); - } - FUNC_END_DEBUG_MSG; - } - - // -------------------------------------------------------------------- - std::pair> insert_new_future( - uint32_t remote_ip) - { - LOG_DEBUG_MSG("insert_new_future : Obsolete in RDM mode"); - std::terminate(); - - LOG_DEBUG_MSG("Inserting future in map " - << ipaddress(here_.ip_address()) << "-> " - << ipaddress(remote_ip) << "( " << ipaddress(here_.ip_address()) - << " )"); - - // - hpx::promise new_endpoint_promise; - hpx::future new_endpoint_future = - new_endpoint_promise.get_future(); - // - auto fp_pair = std::make_pair(remote_ip, - std::make_tuple(HPX_MOVE(new_endpoint_promise), - HPX_MOVE(new_endpoint_future))); - // - auto it = endpoint_tmp_.insert(HPX_MOVE(fp_pair)); - // if the insert failed, we must safely delete the future/promise - if (!it.second) - { - LOG_DEBUG_MSG("Must safely delete promise"); - } - - // get the future that was inserted or already present - // the future will become ready when remote end accepts/rejects connection - // or we accept a connection from a remote - hpx::shared_future result = - std::get<1>(it.first->second); - - // if the insert fails due to a duplicate value, return the duplicate - if (!it.second) - { - return std::make_pair(false, result); - } - return std::make_pair(true, result); - } - - // -------------------------------------------------------------------- - fi_addr_t insert_address(const locality& remote) - { - FUNC_START_DEBUG_MSG; - LOG_DEBUG_MSG("inserting address in vector " - << ipaddress(remote.ip_address())); - fi_addr_t result = 0xffffffff; - int ret = - fi_av_insert(av_, remote.fabric_data(), 1, &result, 0, nullptr); - if (ret < 0) - { - fabric_error(ret, "fi_av_insert"); - } - else if (ret != 1) - { - fabric_error(ret, "fi_av_insert did not return 1"); - } - uint64_t key = (uint64_t) remote.ip_address() << 32 | remote.port(); - endpoint_av_.insert(std::make_pair(key, result)); - LOG_DEBUG_MSG("Address inserted in vector " - << ipaddress(remote.ip_address()) << ":" << remote.port() - << hexuint64(result)); - FUNC_END_DEBUG_MSG; - return result; - } - - // -------------------------------------------------------------------- - hpx::shared_future connect_to_server( - const locality& remote) - { - LOG_DEBUG_MSG("connect_to_server : Obsolete in RDM mode"); - std::terminate(); - - const uint32_t& remote_ip = remote.ip_address(); - - // Has a connection been started from here already? - // Note: The future must be created before we call fi_connect - // otherwise a connection may complete before the future is setup - auto connection = insert_new_future(remote_ip); - - // if a connection is already underway, just return the future - if (!connection.first) - { - LOG_DEBUG_MSG("connect to server : returning existing future"); - // the future will become ready when the remote end accepts/rejects - // our connection - or we accept a connection from a remote - return connection.second; - } - - // for thread safety, make a copy of the fi_info before setting - // the address in it. fi_freeinfo will free the dest_addr field. - struct fi_info* new_info = fi_dupinfo(fabric_info_); - new_info->dest_addrlen = locality::array_size; - new_info->dest_addr = malloc(locality::array_size); - std::memcpy(new_info->dest_addr, remote.fabric_data(), - locality::array_size); - - uint64_t flags = 0; - struct fi_info* fabric_info_active_; - int ret = fi_getinfo(FI_VERSION(1, 4), nullptr, nullptr, flags, - new_info, &fabric_info_active_); - if (ret) - throw fabric_error(ret, "fi_getinfo"); - - LOG_DEBUG_MSG("New connection for IP address " - << ipaddress(remote.ip_address()) << "Fabric info " - << fi_tostr(fabric_info_active_, FI_TYPE_INFO)); - create_completion_queues(fabric_info_active_, 0); - - fid_ep* new_endpoint; - new_endpoint_active(fabric_info_active_, &new_endpoint); - - // now it is safe to call connect - LOG_DEBUG_MSG("Calling fi_connect from " - << ipaddress(here_.ip_address()) << "-> " - << ipaddress(remote.ip_address()) << "( " - << ipaddress(here_.ip_address()) << " )"); - - ret = fi_connect(new_endpoint, remote.fabric_data(), nullptr, 0); - if (ret) - throw fabric_error(ret, "fi_connect"); - - LOG_DEBUG_MSG("Deleting new endpoint info structure"); - fi_freeinfo(fabric_info_active_); - fi_freeinfo(new_info); - - return connection.second; - } - - void disconnect_all() {} - - bool active() - { - return false; - } - - private: - // store info about local device - std::string device_; - std::string interface_; - sockaddr_in local_addr_; - - // callback functions used for connection event handling - ConnectionFunction connection_function_; - DisconnectionFunction disconnection_function_; - - // Pinned memory pool used for allocating buffers - std::unique_ptr> - memory_pool_; - - // Shared completion queue for all endoints - // Count outstanding receives posted to SRQ + Completion queue - std::vector receivers_; - - // only allow one thread to handle connect/disconnect events etc - mutex_type initialization_mutex_; - mutex_type endpoint_map_mutex_; - mutex_type polling_mutex_; - - // used to skip polling event channel too frequently - typedef std::chrono::time_point time_type; - time_type event_check_time_; - uint32_t event_pause_; - }; - - // Smart pointer for libfabric_controller obje - typedef std::shared_ptr libfabric_controller_ptr; - -}}}} // namespace hpx::parcelset::policies::libfabric diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/libfabric_region_provider.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/libfabric_region_provider.hpp deleted file mode 100644 index c42abdce8816..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/libfabric_region_provider.hpp +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (c) 2015-2016 John Biddiscombe -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include -// -#include -#include - -namespace hpx { namespace parcelset { namespace policies { namespace libfabric { - struct libfabric_region_provider - { - // The internal memory region handle - typedef struct fid_mr provider_region; - typedef struct fid_domain provider_domain; - - template - static int register_memory(Args&&... args) - { - return fi_mr_reg(HPX_FORWARD(Args, args)...); - } - - static int unregister_memory(provider_region* region) - { - return fi_close(®ion->fid); - } - - static int flags() - { - return FI_READ | FI_WRITE | FI_RECV | FI_SEND | FI_REMOTE_READ | - FI_REMOTE_WRITE; - } - }; - -}}}} // namespace hpx::parcelset::policies::libfabric diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/locality.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/locality.hpp deleted file mode 100644 index 060a80acabc6..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/locality.hpp +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright (c) 2015 John Biddiscombe -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -#include -#include - -#include -#include -// -#include -#include -#include -#include -#include - -// Different providers use different address formats that we must accommodate -// in our locality object. -#ifdef HPX_PARCELPORT_LIBFABRIC_GNI -#define HPX_PARCELPORT_LIBFABRIC_LOCALITY_SIZE 48 -#endif - -#if defined(HPX_PARCELPORT_LIBFABRIC_VERBS) || \ - defined(HPX_PARCELPORT_LIBFABRIC_SOCKETS) || \ - defined(HPX_PARCELPORT_LIBFABRIC_PSM2) -#define HPX_PARCELPORT_LIBFABRIC_LOCALITY_SIZE 16 -#define HPX_PARCELPORT_LIBFABRIC_LOCALITY_SOCKADDR -#endif - -//typedef struct fid* fi_addr_t; - -namespace hpx { namespace parcelset { namespace policies { namespace libfabric { - - // -------------------------------------------------------------------- - // Locality, in this structure we store the informartion required by - // libfabric to make a connection to another node. - // With libfabric 1.4.x the array contains the fabric ip address stored - // as the second uint32_t in the array. For this reason we use an - // array of uint32_t rather than uint8_t/char so we can easily access - // the ip for debug/validation purposes - // -------------------------------------------------------------------- - struct locality - { - // the number of 32bit ints stored in our array - static const uint32_t array_length = - HPX_PARCELPORT_LIBFABRIC_LOCALITY_SIZE / 4; - static const uint32_t array_size = - HPX_PARCELPORT_LIBFABRIC_LOCALITY_SIZE; - - // array type of our locality data - typedef std::array locality_data; - - static const char* type() - { - return "libfabric"; - } - - explicit locality(const locality_data& in_data) - { - std::memcpy(&data_[0], &in_data[0], array_size); - fi_address_ = 0; - LOG_DEBUG_MSG("explicit constructing locality from " - << ipaddress(ip_address()) << ":" << decnumber(port())); - } - - locality() - { - std::memset(&data_[0], 0x00, array_size); - fi_address_ = 0; - LOG_DEBUG_MSG("default constructing locality from " - << ipaddress(ip_address()) << ":" << decnumber(port())); - } - - locality(const locality& other) - : data_(other.data_) - , fi_address_(other.fi_address_) - { - ; - LOG_DEBUG_MSG("copy constructing locality with " - << ipaddress(ip_address()) << ":" << decnumber(port())); - } - - locality(locality&& other) - : data_(HPX_MOVE(other.data_)) - , fi_address_(other.fi_address_) - { - LOG_DEBUG_MSG("move constructing locality with " - << ipaddress(ip_address()) << ":" << decnumber(port())); - } - - locality& operator=(const locality& other) - { - data_ = other.data_; - fi_address_ = other.fi_address_; - LOG_DEBUG_MSG("copy operator locality with " - << ipaddress(ip_address()) << ":" << decnumber(port())); - return *this; - } - - const uint32_t& ip_address() const - { -#if defined(HPX_PARCELPORT_LIBFABRIC_LOCALITY_SOCKADDR) - return reinterpret_cast(data_.data()) - ->sin_addr.s_addr; -#elif defined(HPX_PARCELPORT_LIBFABRIC_GNI) - return data_[0]; -#else - throw fabric_error( - 0, "unsupported fabric provider, please fix ASAP"); -#endif - } - - static const uint32_t& ip_address(const locality_data& data) - { -#if defined(HPX_PARCELPORT_LIBFABRIC_LOCALITY_SOCKADDR) - return reinterpret_cast(&data) - ->sin_addr.s_addr; -#elif defined(HPX_PARCELPORT_LIBFABRIC_GNI) - return data[0]; -#else - throw fabric_error( - 0, "unsupported fabric provider, please fix ASAP"); -#endif - } - - fi_addr_t fi_address() const - { - return fi_address_; - } - - void set_fi_address(fi_addr_t fi_addr) - { - fi_address_ = fi_addr; - } - - uint16_t port() const - { - uint16_t port = - 256 * reinterpret_cast(data_.data())[2] + - reinterpret_cast(data_.data())[3]; - return port; - } - - // some condition marking this locality as valid - explicit operator bool() const - { - return (ip_address() != 0); - } - - void save(serialization::output_archive& ar) const - { - ar << data_; - } - - void load(serialization::input_archive& ar) - { - ar >> data_; - } - - const void* fabric_data() const - { - return data_.data(); - } - - char* fabric_data_writable() - { - return reinterpret_cast(data_.data()); - } - - bool valid() - { - return true; - } - - private: - friend bool operator==(locality const& lhs, locality const& rhs) - { -#if defined(HPX_PARCELPORT_LIBFABRIC_HAVE_LOGGING) - uint32_t a1 = lhs.ip_address(); - uint32_t a2 = rhs.ip_address(); - LOG_DEBUG_MSG( - "Testing array equality " << ipaddress(a1) << ipaddress(a2)); -#endif - return (lhs.data_ == rhs.data_); - } - - friend bool operator<(locality const& lhs, locality const& rhs) - { - uint32_t a1 = lhs.ip_address(); - uint32_t a2 = rhs.ip_address(); - return a1 < a2; - } - - friend std::ostream& operator<<(std::ostream& os, locality const& loc) - { - hpx::util::ios_flags_saver ifs(os); - for (uint32_t i = 0; i < array_length; ++i) - { - os << loc.data_[i]; - } - return os; - } - - private: - locality_data data_; - fi_addr_t fi_address_; - }; - -}}}} // namespace hpx::parcelset::policies::libfabric diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/parcelport_libfabric.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/parcelport_libfabric.hpp deleted file mode 100644 index 17caf00eb1c0..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/parcelport_libfabric.hpp +++ /dev/null @@ -1,281 +0,0 @@ -// Copyright (c) 2015-2016 John Biddiscombe -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -// config -#include -#include -// util -#include -#include -#include -#include -#include -#include -#include -#include - -// The memory pool specialization need to be pulled in before encode_parcels -#include -#include -#include -#include -// - -// -------------------------------------------------------------------- -// Controls whether we are allowed to suspend threads that are sending -// when we have maxed out the number of sends we can handle -#define HPX_PARCELPORT_LIBFABRIC_SUSPEND_WAKE \ - (HPX_PARCELPORT_LIBFABRIC_THROTTLE_SENDS / 2) - -// -------------------------------------------------------------------- -// Enable the use of hpx small_vector for certain short lived storage -// elements within the parcelport. This can reduce some memory allocations -#define HPX_PARCELPORT_LIBFABRIC_USE_SMALL_VECTOR true - -#define HPX_PARCELPORT_LIBFABRIC_IMM_UNSUPPORTED 1 - -// -------------------------------------------------------------------- -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// -#if HPX_PARCELPORT_LIBFABRIC_USE_SMALL_VECTOR -#include -#endif -// -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace hpx::parcelset::policies; - -namespace hpx { namespace parcelset { namespace policies { namespace libfabric { - // -------------------------------------------------------------------- - // parcelport, the implementation of the parcelport itself - // -------------------------------------------------------------------- - struct HPX_EXPORT parcelport : public parcelport_impl - { - private: - typedef parcelport_impl base_type; - - public: - // These are the types used in the parcelport for locking etc - // Note that spinlock is the only supported mutex that works on HPX+OS threads - // and condition_variable_any can be used across HPX/OS threads - typedef hpx::spinlock mutex_type; - typedef hpx::parcelset::policies::libfabric::scoped_lock - scoped_lock; - typedef hpx::parcelset::policies::libfabric::unique_lock - unique_lock; - typedef rma_memory_region region_type; - typedef memory_region_allocator - allocator_type; - - // -------------------------------------------------------------------- - // main vars used to manage the RDMA controller and interface - // These are called from a static function, so use static - // -------------------------------------------------------------------- - libfabric_controller_ptr libfabric_controller_; - - // our local ip address (estimated based on fabric PP address info) - uint32_t ip_addr_; - - // Not currently working, we support bootstrapping, but when not enabled - // we should be able to skip it - bool bootstrap_enabled_; - bool parcelport_enabled_; - - // @TODO, clean up the allocators, buffers, chunk_pool etc so that there is a - // more consistent reuse of classes/types. - // The use of pointer allocators etc is a dreadful hack and needs reworking - - typedef header - header_type; - static constexpr unsigned int header_size = - header_type::header_block_size; - typedef rma_memory_pool memory_pool_type; - typedef pinned_memory_vector - snd_data_type; - typedef parcel_buffer snd_buffer_type; - // when terminating the parcelport, this is used to restrict access - mutex_type stop_mutex; - - hpx::lockfree::stack, - HPX_PARCELPORT_LIBFABRIC_THROTTLE_SENDS, true> - senders_; - - // Used to help with shutdown - std::atomic stopped_; - - memory_pool_type* chunk_pool_; - - // parcelset::gatherer& parcels_sent_; - - // for debugging/performance measurement - performance_counter completions_handled_; - performance_counter senders_in_use_; - - // -------------------------------------------------------------------- - // Constructor : mostly just initializes the superclass with 'here' - // -------------------------------------------------------------------- - parcelport(util::runtime_configuration const& ini, - threads::policies::callback_notifier const& notifier); - - // Start the handling of connections. - bool do_run(); - - // -------------------------------------------------------------------- - // return a sender object back to the parcelport_impl - // this is used by the send_immediate version of parcelport_impl - // -------------------------------------------------------------------- - sender* get_connection( - parcelset::locality const& dest, fi_addr_t& fi_addr); - - void reclaim_connection(sender* s); - - // -------------------------------------------------------------------- - // return a sender object back to the parcelport_impl - // this is for compatibility with non send_immediate operation - // -------------------------------------------------------------------- - std::shared_ptr create_connection( - parcelset::locality const& dest, error_code& ec); - - ~parcelport(); - - /// Should not be used any more as parcelport_impl handles this? - bool can_bootstrap() const; - - /// Return the name of this locality - std::string get_locality_name() const; - - parcelset::locality agas_locality( - util::runtime_configuration const& ini) const; - - parcelset::locality create_locality() const; - - static void suspended_task_debug(const std::string& match); - - void do_stop(); - - // -------------------------------------------------------------------- - bool can_send_immediate(); - - // -------------------------------------------------------------------- - template - bool async_write(Handler&& handler, sender* sender, fi_addr_t addr, - snd_buffer_type& buffer); - - // -------------------------------------------------------------------- - // This is called to poll for completions and handle all incoming messages - // as well as complete outgoing messages. - // -------------------------------------------------------------------- - // Background work - // - // This is called whenever the main thread scheduler is idling, - // is used to poll for events, messages on the libfabric connection - // -------------------------------------------------------------------- - bool background_work( - std::size_t num_thread, parcelport_background_mode mode); - void io_service_work(); - bool background_work_OS_thread(); - }; -}}}} // namespace hpx::parcelset::policies::libfabric - -namespace hpx { namespace traits { - // Inject additional configuration data into the factory registry for this - // type. This information ends up in the system wide configuration database - // under the plugin specific section: - // - // [hpx.parcel.libfabric] - // ... - // priority = 100 - // - template <> - struct plugin_config_data - { - static char const* priority() - { - FUNC_START_DEBUG_MSG; - static int log_init = false; - if (!log_init) - { -#if defined(HPX_PARCELPORT_LIBFABRIC_HAVE_LOGGING) || \ - defined(HPX_PARCELPORT_LIBFABRIC_HAVE_DEV_MODE) - boost::log::add_console_log(std::clog, - // This makes the sink to write log records that look like this: - // 1: A normal severity message - // 2: An error severity message - boost::log::keywords::format = - (boost::log::expressions::stream - << boost::log::expressions::attr( - "LineID") - << ": <" << boost::log::trivial::severity << "> " - << boost::log::expressions::smessage)); - boost::log::add_common_attributes(); -#endif - log_init = true; - } - FUNC_END_DEBUG_MSG; - return "10000"; - } - - // This is used to initialize your parcelport, - // for example check for availability of devices etc. - static void init(int*, char***, util::command_line_handling&) - { - FUNC_START_DEBUG_MSG; -#ifdef HPX_PARCELPORT_LIBFABRIC_HAVE_PMI - cfg.ini_config_.push_back("hpx.parcel.bootstrap!=libfabric"); -#endif - - FUNC_END_DEBUG_MSG; - } - - // by default no additional initialization using the resource - // partitioner is required - static constexpr void init(hpx::resource::partitioner&) noexcept {} - - static void destroy() {} - - static char const* call() - { - FUNC_START_DEBUG_MSG; - FUNC_END_DEBUG_MSG; - // @TODO : check which of these are obsolete after recent changes - return "provider = " - "${HPX_PARCELPORT_LIBFABRIC_" - "PROVIDER:" HPX_PARCELPORT_LIBFABRIC_PROVIDER "}\n" - "domain = " - "${HPX_PARCELPORT_LIBFABRIC_" - "DOMAIN:" HPX_PARCELPORT_LIBFABRIC_DOMAIN "}\n" - "endpoint = " - "${HPX_PARCELPORT_LIBFABRIC_" - "ENDPOINT:" HPX_PARCELPORT_LIBFABRIC_ENDPOINT "}\n"; - } - }; -}} // namespace hpx::traits diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/parcelport_logging.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/parcelport_logging.hpp deleted file mode 100644 index 8abeeb7d7cd3..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/parcelport_logging.hpp +++ /dev/null @@ -1,287 +0,0 @@ -// Copyright (c) 2014-2017 John Biddiscombe -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -#include -#include -#include -// -#include -#if defined(__linux) || defined(__linux__) -#include -#else -#define sched_getcpu() '#' -#endif - -#include -#include -#include -#include -#include -#include - -// ------------------------------------------------------------------ -// Set flags to help simplify the log defines -// ------------------------------------------------------------------ - -// full logging support, we want everything -#if defined(HPX_PARCELPORT_LIBFABRIC_HAVE_LOGGING) -#define HPX_PARCELPORT_LOGGING_INCLUDE_FILES -#define HPX_PARCELPORT_LOGGING_HAVE_TRACE_LOG -#define HPX_PARCELPORT_LOGGING_HAVE_DEBUG_LOG -#define HPX_PARCELPORT_LOGGING_HAVE_TIMED_LOG -#define HPX_PARCELPORT_LOGGING_HAVE_DEVEL_LOG - -// just a subset of logging for dev mode enabled -#elif defined(HPX_PARCELPORT_LIBFABRIC_HAVE_DEV_MODE) -#define HPX_PARCELPORT_LOGGING_INCLUDE_FILES -#define HPX_PARCELPORT_LOGGING_HAVE_TIMED_LOG -#define HPX_PARCELPORT_LOGGING_HAVE_DEVEL_LOG -#endif - -// ------------------------------------------------------------------ -// useful macros for formatting log messages -// ------------------------------------------------------------------ -#define nhex(n) \ - "0x" << std::setfill('0') << std::setw(n) << std::noshowbase << std::hex -#define hexpointer(p) nhex(16) << (uintptr_t)(p) << " " -#define hexuint64(p) nhex(16) << (uintptr_t)(p) << " " -#define hexuint32(p) nhex(8) << (uint32_t)(p) << " " -#define hexlength(p) nhex(6) << (uintptr_t)(p) << " " -#define hexnumber(p) nhex(4) << (uintptr_t)(p) << " " -#define hexbyte(p) nhex(2) << static_cast(p) << " " -#define decimal(n) \ - std::setfill('0') << std::setw(n) << std::noshowbase << std::dec -#define decnumber(p) std::dec << p << " " -#define dec4(p) decimal(4) << p << " " -#define ipaddress(p) \ - std::dec << (int) (reinterpret_cast(&p))[0] << "." \ - << (int) (reinterpret_cast(&p))[1] << "." \ - << (int) (reinterpret_cast(&p))[2] << "." \ - << (int) (reinterpret_cast(&p))[3] << " " -#define sockaddress(p) ipaddress(((struct sockaddr_in*) (p))->sin_addr.s_addr) - -// ------------------------------------------------------------------ -// include files needed for boost::log -// ------------------------------------------------------------------ -#ifdef HPX_PARCELPORT_LOGGING_INCLUDE_FILES -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#endif - -#include - -// ------------------------------------------------------------------ -// helper classes/functions used in logging -// ------------------------------------------------------------------ -namespace hpx { namespace parcelset { namespace policies { namespace libfabric { - namespace detail { - - // ------------------------------------------------------------------ - // helper class for printing thread ID - // ------------------------------------------------------------------ - struct rdma_thread_print_helper - { - }; - - inline std::ostream& operator<<( - std::ostream& os, const rdma_thread_print_helper&) - { - if (hpx::threads::get_self_id() == hpx::threads::invalid_thread_id) - { - os << "------------------ "; - } - else - { - hpx::threads::thread_data* dummy = - hpx::threads::get_self_id_data(); - os << hexpointer(dummy); - } - os << nhex(12) << std::this_thread::get_id() << " cpu " - << decnumber(sched_getcpu()); - return os; - } - - // ------------------------------------------------------------------ - // helper function for printing CRC32 - // ------------------------------------------------------------------ - inline uint32_t crc32(const void* address, size_t length) - { - boost::crc_32_type result; - result.process_bytes(address, length); - return result.checksum(); - } - - // ------------------------------------------------------------------ - // helper function for printing CRC32 and short memory dump - // ------------------------------------------------------------------ - inline std::string mem_crc32( - const void* address, size_t length, const char* txt) - { - const uint64_t* uintBuf = static_cast(address); - std::stringstream temp; - temp << "Memory: "; - temp << "address " << hexpointer(address) << "length " - << hexuint32(length) - << "CRC32: " << hexuint32(crc32(address, length)); - for (size_t i = 0; i < (std::min)(length / 8, size_t(128)); i++) - { - temp << hexuint64(*uintBuf++); - } - temp << ": " << txt; - return temp.str(); - } - -}}}}} // namespace hpx::parcelset::policies::libfabric::detail - -#define THREAD_ID \ - "" << hpx::parcelset::policies::libfabric::detail:: \ - rdma_thread_print_helper() - -#define CRC32(buf, len) \ - "" << hpx::parcelset::policies::libfabric::detail::crc32(buf, len) - -#define CRC32_MEM(buf, len, txt) \ - "" << hpx::parcelset::policies::libfabric::detail::mem_crc32(buf, len, txt) - -// ------------------------------------------------------------------ -// Trace messages are enabled for full debug -// ------------------------------------------------------------------ -#ifdef HPX_PARCELPORT_LOGGING_HAVE_TRACE_LOG -#define LOG_TRACE_MSG(x) BOOST_LOG_TRIVIAL(trace) << THREAD_ID << " " << x; -#else -#define LOG_TRACE_MSG(x) -#endif - -// ------------------------------------------------------------------ -// if enabled : define all main logging macros -// ------------------------------------------------------------------ -#ifdef HPX_PARCELPORT_LOGGING_HAVE_DEBUG_LOG - -#define LOG_DEBUG_MSG(x) BOOST_LOG_TRIVIAL(debug) << THREAD_ID << " " << x; -#define LOG_INFO_MSG(x) BOOST_LOG_TRIVIAL(info) << THREAD_ID << " " << x; -#define LOG_WARN_MSG(x) BOOST_LOG_TRIVIAL(warning) << THREAD_ID << " " << x; -#define LOG_ERROR_MSG(x) BOOST_LOG_TRIVIAL(error) << THREAD_ID << " " << x; -#define LOG_FATAL_MSG(x) BOOST_LOG_TRIVIAL(fatal) << THREAD_ID << " " << x; -// -#define LOG_EXCLUSIVE(x) x -// -#define FUNC_START_DEBUG_MSG LOG_TRACE_MSG("*** Enter " << __func__); -#define FUNC_END_DEBUG_MSG LOG_TRACE_MSG("### Exit " << __func__); -// -#define LOG_FORMAT_MSG(x) \ - (dynamic_cast( \ - std::ostringstream().seekp(0, std::ios_base::cur) \ - << x << __FILE__ << " " << std::dec << __LINE__)) \ - .str() - -#else -#define LOG_DEBUG_MSG(x) -#define LOG_INFO_MSG(x) -#define LOG_WARN_MSG(x) -#define LOG_ERROR_MSG(x) \ - std::cout << "00: " << THREAD_ID << " " << x << " " << __FILE__ \ - << " " << std::dec << __LINE__ << std::endl; -#define LOG_FATAL_MSG(x) LOG_ERROR_MSG(x) -// -#define LOG_EXCLUSIVE(x) -// -#define FUNC_START_DEBUG_MSG -#define FUNC_END_DEBUG_MSG -// -#define LOG_FORMAT_MSG(x) "" - -#endif - -// ------------------------------------------------------------------ -// dev logging: just enable the LOG_DEVEL macro to bypass most log output -// but still show some that have been specially marked -// ------------------------------------------------------------------ -#ifdef HPX_PARCELPORT_LOGGING_HAVE_DEVEL_LOG -#define LOG_DEVEL_MSG(x) \ - BOOST_LOG_TRIVIAL(debug) << "" << THREAD_ID << " " << x; -#else -#define LOG_DEVEL_MSG(x) -#endif - -// ------------------------------------------------------------------ -// Timed log macros : used during long loops to avoid excessive output -// only prints the log message every N seconds -// ------------------------------------------------------------------ -#ifdef HPX_PARCELPORT_LOGGING_HAVE_TIMED_LOG - -#define LOG_TIMED_INIT(name) \ - using namespace std::chrono; \ - static time_point log_timed_start_##name = \ - system_clock::now(); - -#define LOG_TIMED_MSG(name, level, delay, x) \ - time_point log_timed_now_##name = system_clock::now(); \ - duration log_timed_elapsed_##name = \ - log_timed_now_##name - log_timed_start_##name; \ - if (log_timed_elapsed_##name.count() > delay) \ - { \ - LOG_DEVEL_MSG(x); \ - log_timed_start_##name = log_timed_now_##name; \ - } - -#define LOG_TIMED_BLOCK(name, level, delay, x) \ - time_point log_timed_now_##name = system_clock::now(); \ - duration log_timed_elapsed_##name = \ - log_timed_now_##name - log_timed_start_##name; \ - if (log_timed_elapsed_##name.count() > delay) \ - { \ - log_timed_start_##name = log_timed_now_##name; \ - x; \ - } - -#else -#define LOG_TIMED_INIT(name) -#define LOG_TIMED_MSG(name, level, delay, x) -#define LOG_TIMED_BLOCK(name, level, delay, x) -#endif - -// ------------------------------------------------------------------ -// Utility to allow automatic printing of enum names in log messages -// -// example of usage -// DEFINE_ENUM_WITH_STRING_CONVERSIONS(test_type, (test1)(test2)(test3)) -// ------------------------------------------------------------------ - -#define X_DEFINE_ENUM_WITH_STRING_CONVERSIONS_TOSTRING_CASE(r, data, elem) \ - case elem: \ - return HPX_PP_STRINGIZE(elem); \ - /**/ - -#define DEFINE_ENUM_WITH_STRING_CONVERSIONS(name, enumerators) \ - enum name \ - { \ - BOOST_PP_SEQ_ENUM(enumerators) \ - }; \ - \ - static const char* ToString(name v) \ - { \ - switch (v) \ - { \ - BOOST_PP_SEQ_FOR_EACH( \ - X_DEFINE_ENUM_WITH_STRING_CONVERSIONS_TOSTRING_CASE, name, \ - enumerators) \ - default: \ - return "[Unknown " HPX_PP_STRINGIZE(name) "]"; \ - } \ - } \ - /**/ diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/performance_counter.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/performance_counter.hpp deleted file mode 100644 index b32762f49002..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/performance_counter.hpp +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (C) 2016 John Biddiscombe -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See -// accompanying file LICENSE_1_0.txt or copy at - -#pragma once - -#include -#include -// -#include -#include -#include - -#ifdef HPX_PARCELPORT_LIBFABRIC_HAVE_PERFORMANCE_COUNTERS -#define PERFORMANCE_COUNTER_ENABLED true -#else -#define PERFORMANCE_COUNTER_ENABLED false -#endif - -// -// This class is intended to provide a simple atomic counter that can be used as a -// performance counter, but that can be disabled at compile time so that it -// has no performance cost when not used. It is only to avoid a lot of #ifdef -// statements in user code that we collect everything in here and then provide -// the performance counter that will simply do nothing when disabled - but -// still allow code that uses the counters in arithmetic to compile. -// -namespace hpx { namespace parcelset { - - template ::value>> - struct performance_counter - { - }; - - // -------------------------------------------------------------------- - // specialization for performance counters Enabled - // we provide an atomic that can be incremented or added/subtracted to - template - struct performance_counter - { - performance_counter() - : value_{T()} - { - } - - explicit performance_counter(const T& init) - : value_{init} - { - } - - inline operator T() const - { - return value_; - } - - inline T operator=(const T& x) - { - return value_ = x; - } - - inline T operator++() - { - return ++value_; - } - - inline T operator++(int x) - { - return (value_ += x); - } - - inline T operator+=(const T& rhs) - { - return (value_ += rhs); - } - - inline T operator--() - { - return --value_; - } - - inline T operator--(int x) - { - return (value_ -= x); - } - - inline T operator-=(const T& rhs) - { - return (value_ -= rhs); - } - - friend std::ostream& operator<<( - std::ostream& os, const performance_counter& x) - { - os << x.value_; - return os; - } - - std::atomic value_; - }; - - // -------------------------------------------------------------------- - // specialization for performance counters Disabled - // just return dummy values so that arithmetic operations compile ok - template - struct performance_counter - { - constexpr performance_counter() = default; - - explicit constexpr performance_counter(const T&) {} - - inline constexpr operator T() const - { - return 0; - } - - inline constexpr T operator=(const T&) - { - return 0; - } - - inline constexpr T operator++() - { - return 0; - } - - inline constexpr T operator++(int) - { - return 0; - } - - inline constexpr T operator+=(const T&) - { - return 0; - } - - inline constexpr T operator--() - { - return 0; - } - - inline constexpr T operator--(int) - { - return 0; - } - - inline constexpr T operator-=(const T&) - { - return 0; - } - - friend std::ostream& operator<<( - std::ostream& os, const performance_counter&) - { - os << "undefined"; - return os; - } - }; -}} // namespace hpx::parcelset diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/pinned_memory_vector.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/pinned_memory_vector.hpp deleted file mode 100644 index 0f6688e95b5a..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/pinned_memory_vector.hpp +++ /dev/null @@ -1,238 +0,0 @@ -// Copyright (c) 2015 John Biddiscombe -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -#include -#include -// -#include -#include -#include -// -#include -#include -#include -#include -// -namespace hpx { namespace parcelset { namespace policies { namespace libfabric { - // this class looks like a vector, but can be initialized from a pointer and size, - // it is used by the verbs parcelport to pass an rdma memory chunk with received - // data into the decode parcel buffer routines. - // it cannot be resized or changed once created and does not delete wrapped memory - template - class pinned_memory_vector - { - public: - typedef T value_type; - typedef value_type& reference; - typedef const value_type& const_reference; - typedef T* iterator; - typedef T const* const_iterator; - typedef typename std::vector::difference_type difference_type; - typedef typename std::vector::size_type size_type; - // - typedef Allocator allocator_type; - typedef Region region_type; - - typedef pinned_memory_vector - vector_type; - - typedef hpx::function deleter_callback; - - // internal vars - T* m_array_; - std::size_t m_size_; - deleter_callback m_cb_; - allocator_type* m_alloc_; - region_type* m_region_; - - // construct with a memory pool allocator - explicit pinned_memory_vector(allocator_type* alloc) - : m_array_(0) - , m_size_(0) - , m_cb_(0) - , m_alloc_(alloc) - , m_region_(0) - { - LOG_DEBUG_MSG("pinned_memory_vector alloc " - << "size " << hexuint32(m_size_) << "array " - << hexpointer(m_array_) << "region " << hexpointer(m_region_) - << "alloc " << hexpointer(m_alloc_)); - } - - // construct from existing memory chunk, provide allocator, deleter etc - pinned_memory_vector(T* p, std::size_t s, deleter_callback cb, - allocator_type* alloc, region_type* r) - : m_array_(p) - , m_size_(s) - , m_cb_(cb) - , m_alloc_(alloc) - , m_region_(r) - { - LOG_DEBUG_MSG("pinned_memory_vector exist " - << "size " << hexuint32(m_size_) << "array " - << hexpointer(m_array_) << "region " << hexpointer(m_region_) - << "alloc " << hexpointer(m_alloc_)); - } - - // move constructor, - pinned_memory_vector(vector_type&& other) - : m_array_(other.m_array_) - , m_size_(other.m_size_) - , m_cb_(HPX_MOVE(other.m_cb_)) - , m_alloc_(other.m_alloc_) - , m_region_(other.m_region_) - { - LOG_DEBUG_MSG("pinned_memory_vector moved " - << "size " << hexuint32(m_size_) << "array " - << hexpointer(m_array_) << "region " << hexpointer(m_region_) - << "alloc " << hexpointer(m_alloc_)); - other.m_size_ = 0; - other.m_array_ = 0; - other.m_cb_ = nullptr; - other.m_alloc_ = nullptr; - other.m_region_ = nullptr; - } - - ~pinned_memory_vector() - { - if (m_array_ && m_cb_) - { - LOG_DEBUG_MSG("pinned_memory_vector delete " - << "size " << hexuint32(m_size_) << "array " - << hexpointer(m_array_) << "region " - << hexpointer(m_region_) << "alloc " - << hexpointer(m_alloc_)); - m_cb_(); - } - } - - // move copy operator - vector_type& operator=(vector_type&& other) - { - m_array_ = other.m_array_; - m_size_ = other.m_size_; - m_cb_ = other.m_cb_; - m_alloc_ = other.m_alloc_; - m_region_ = other.m_region_; - LOG_DEBUG_MSG("pinned_memory_vector assigned/moved " - << "size " << hexuint32(m_size_) << "array " - << hexpointer(m_array_) << "region " << hexpointer(m_region_) - << "alloc " << hexpointer(m_alloc_)); - other.m_size_ = 0; - other.m_array_ = 0; - other.m_cb_ = nullptr; - other.m_alloc_ = nullptr; - other.m_region_ = nullptr; - return *this; - } - - size_type size() const - { - return m_size_; - } - - size_type max_size() const - { - return m_size_; - } - - bool empty() const - { - return m_array_ == nullptr; - } - - T* data() - { - return m_array_; - } - - iterator begin() - { - return iterator(&m_array_[0]); - } - - iterator end() - { - return iterator(&m_array_[m_size_]); - } - - const_iterator begin() const - { - return iterator(&m_array_[0]); - } - - const_iterator end() const - { - return iterator(&m_array_[m_size_]); - } - - reference operator[](std::size_t index) - { - return m_array_[index]; - } - const_reference operator[](std::size_t index) const - { - return m_array_[index]; - } - - void push_back(const T& _Val) {} - - std::size_t capacity() - { - return m_region_ ? m_region_->get_size() : 0; - } - - inline void resize(std::size_t s) - { - LOG_TRACE_MSG("pinned_memory_vector " - << "size " << hexuint32(m_size_) << "array " - << hexpointer(m_array_) << "region " << hexpointer(m_region_) - << "alloc " << hexpointer(m_alloc_) << "resizing from " - << m_size_ << " to " << s); - - if (m_region_) - { - if (s > m_region_->get_size()) - { - LOG_ERROR_MSG("Resizing from " << m_region_->get_size() - << " to " << s); - throw std::runtime_error( - "pinned_memory_vector should never be resized once an " - "allocation has been assigned"); - } - m_size_ = s; - } - else - { - m_region_ = m_alloc_->allocate_region(s); - m_array_ = static_cast(m_region_->get_address()); - m_size_ = s; - } - } - - void reserve(std::size_t s) - { - LOG_DEBUG_MSG("pinned_memory_vector " - << "size " << hexuint32(m_size_) << "array " - << hexpointer(m_array_) << "region " << hexpointer(m_region_) - << "alloc " << hexpointer(m_alloc_) << "reserving from " - << m_size_ << " to " << s); - if (m_array_ || m_region_) - { - throw std::runtime_error( - "pinned_memory_vector should never be resized once an " - "allocation has been assigned"); - } - m_region_ = m_alloc_->allocate_region(s); - m_array_ = static_cast(m_region_->get_address()); - } - - private: - pinned_memory_vector(vector_type const& other); - }; -}}}} // namespace hpx::parcelset::policies::libfabric diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rdma_locks.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rdma_locks.hpp deleted file mode 100644 index aeb0cb05a862..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rdma_locks.hpp +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (C) 2016 John Biddiscombe -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See -// accompanying file LICENSE_1_0.txt or copy at - -#pragma once - -#include -#include - -#include - -namespace hpx { namespace parcelset { namespace policies { namespace libfabric { -#ifdef HPX_PARCELPORT_LIBFABRIC_DEBUG_LOCKS - template - struct scoped_lock : std::lock_guard - { - scoped_lock(Mutex& m) - : std::lock_guard(m) - { - LOG_DEBUG_MSG("Creating scoped_lock RAII"); - } - - ~scoped_lock() - { - LOG_DEBUG_MSG("Destroying scoped_lock RAII"); - } - }; - - template - struct unique_lock : std::unique_lock - { - unique_lock(Mutex& m) - : std::unique_lock(m) - { - LOG_DEBUG_MSG("Creating unique_lock RAII"); - } - - unique_lock(Mutex& m, std::try_to_lock_t t) - : std::unique_lock(m, t) - { - LOG_DEBUG_MSG("Creating unique_lock try_to_lock_t RAII"); - } - - ~unique_lock() - { - LOG_DEBUG_MSG("Destroying unique_lock RAII"); - } - }; -#else - template - using scoped_lock = std::lock_guard; - - template - using unique_lock = std::unique_lock; -#endif -}}}} // namespace hpx::parcelset::policies::libfabric diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/readers_writers_mutex.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/readers_writers_mutex.hpp deleted file mode 100644 index 4eefe14ccb30..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/readers_writers_mutex.hpp +++ /dev/null @@ -1,318 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2016 John Biddiscombe -// Copyright (c) 2014-2016 MongoDB, Inc. -// Copyright (c) 2008-2014 WiredTiger, Inc. -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -/////////////////////////////////////////////////////////////////////////////// - -#pragma once - -#include -#include - -#include -#include - -#ifdef RW_LOCK_ENABLE_LOGGING -#define RWL_DEBUG_MSG(x) LOG_DEBUG_MSG(x) -#else -#define RWL_DEBUG_MSG(x) -#endif - -// Note that this implementation uses 16bit counters so can handle 65536 -// contentions on the lock without wraparound. It has not proven to be a -// problem so far. (c.f. original description below) - -/* - * Based on "Spinlocks and Read-Write Locks" by Dr. Steven Fuerst: - * http://locklessinc.com/articles/locks/ - * - * Dr. Fuerst further credits: - * There exists a form of the ticket lock that is designed for read-write - * locks. An example written in assembly was posted to the Linux kernel mailing - * list in 2002 by David Howells from RedHat. This was a highly optimized - * version of a read-write ticket lock developed at IBM in the early 90's by - * Joseph Seigh. Note that a similar (but not identical) algorithm was published - * by John Mellor-Crummey and Michael Scott in their landmark paper "Scalable - * Reader-Writer Synchronization for Shared-Memory Multiprocessors". - * - * The following is an explanation of this code. First, the underlying lock - * structure. - * - * struct { - * uint16_t writers; Now serving for writers - * uint16_t readers; Now serving for readers - * uint16_t next; Next available ticket number - * uint16_t __notused; Padding - * } - * - * First, imagine a store's 'take a number' ticket algorithm. A customer takes - * a unique ticket number and customers are served in ticket order. In the data - * structure, 'writers' is the next writer to be served, 'readers' is the next - * reader to be served, and 'next' is the next available ticket number. - * - * Next, consider exclusive (write) locks. The 'now serving' number for writers - * is 'writers'. To lock, 'take a number' and wait until that number is being - * served; more specifically, atomically copy and increment the current value of - * 'next', and then wait until 'writers' equals that copied number. - * - * Shared (read) locks are similar. Like writers, readers atomically get the - * next number available. However, instead of waiting for 'writers' to equal - * their number, they wait for 'readers' to equal their number. - * - * This has the effect of queuing lock requests in the order they arrive - * (incidentally avoiding starvation). - * - * Each lock/unlock pair requires incrementing both 'readers' and 'writers'. - * In the case of a reader, the 'readers' increment happens when the reader - * acquires the lock (to allow read-lock sharing), and the 'writers' increment - * happens when the reader releases the lock. In the case of a writer, both - * 'readers' and 'writers' are incremented when the writer releases the lock. - * - * For example, consider the following read (R) and write (W) lock requests: - * - * writers readers next - * 0 0 0 - * R: ticket 0, readers match OK 0 1 1 - * R: ticket 1, readers match OK 0 2 2 - * R: ticket 2, readers match OK 0 3 3 - * W: ticket 3, writers no match block 0 3 4 - * R: ticket 2, unlock 1 3 4 - * R: ticket 0, unlock 2 3 4 - * R: ticket 1, unlock 3 3 4 - * W: ticket 3, writers match OK 3 3 4 - * - * Note the writer blocks until 'writers' equals its ticket number and it does - * not matter if readers unlock in order or not. - * - * Readers or writers entering the system after the write lock is queued block, - * and the next ticket holder (reader or writer) will unblock when the writer - * unlocks. An example, continuing from the last line of the above example: - * - * writers readers next - * W: ticket 3, writers match OK 3 3 4 - * R: ticket 4, readers no match block 3 3 5 - * R: ticket 5, readers no match block 3 3 6 - * W: ticket 6, writers no match block 3 3 7 - * W: ticket 3, unlock 4 4 7 - * R: ticket 4, readers match OK 4 5 7 - * R: ticket 5, readers match OK 4 6 7 - * - * The 'next' field is a 2-byte value so the available ticket number wraps at - * 64K requests. If a thread's lock request is not granted until the 'next' - * field cycles and the same ticket is taken by another thread, we could grant - * a lock to two separate threads at the same time, and bad things happen: two - * writer threads or a reader thread and a writer thread would run in parallel, - * and lock waiters could be skipped if the unlocks race. This is unlikely, it - * only happens if a lock request is blocked by 64K other requests. The fix is - * to grow the lock structure fields, but the largest atomic instruction we have - * is 8 bytes, the structure has no room to grow. - */ -// from http://locklessinc.com/articles/locks/ - -#define atomic_xadd(P, V) __sync_fetch_and_add((P), (V)) -#define cmpxchg(P, O, N) __sync_bool_compare_and_swap((P), (O), (N)) -#define atomic_inc(P) __sync_add_and_fetch((P), 1) - -// Compiler instruction reordering rwl_barrier_ -#define rwl_barrier_() asm volatile("" : : : "memory") - -#if defined(HPX_GCC_VERSION) -#pragma GCC push_options -#pragma GCC optimize("O0") -#endif - -namespace hpx { namespace lcos { namespace local { - - class readers_writer_mutex - { - private: - typedef union - { - uint64_t u; - struct - { - uint32_t wr; // Writers and readers - } i; - struct - { - uint16_t writers; // Now serving for writers - uint16_t readers; // Now serving for readers - uint16_t next; // Next available ticket number - uint16_t notused; // Padding to 8 bytes - } s; - } readwrite_ticket; - - readwrite_ticket ticket; - bool readlock_; - - public: - readers_writer_mutex() - : ticket{0} - , readlock_(false) - { - } - - // - // acquire lock for a unique writer - // - void lock() - { - RWL_DEBUG_MSG("lock wr " - << std::hex << this << " r " << ticket.s.readers << " w " - << ticket.s.writers << " n " << ticket.s.next << " v " - << ticket.s.next); - - // memory ordering barrier - rwl_barrier_(); - - uint16_t val = atomic_xadd(&ticket.s.next, 1); - while (val != ticket.s.writers) - { - hpx::util::detail::yield_k(4, nullptr); - } - readlock_ = false; - - // memory ordering rwl_barrier_ - rwl_barrier_(); - } - - // - // unlock writer - // - void unlock() - { - // readlock incremented readers when it took the lock - if (readlock_) - { - atomic_inc(&ticket.s.writers); - RWL_DEBUG_MSG("unlock rd " - << std::hex << this << " r " << ticket.s.readers << " w " - << ticket.s.writers << " n " << ticket.s.next); - } - else - { - // only one writer can enter unlock at a time, do not need atomics - readwrite_ticket new_ticket = ticket; - ++new_ticket.s.writers; - ++new_ticket.s.readers; - RWL_DEBUG_MSG("unlock wr " << std::hex << this << " r " - << new_ticket.s.readers << " w " - << new_ticket.s.writers << " n " - << new_ticket.s.next); - ticket.i.wr = new_ticket.i.wr; - } - } - - // - // try to obtain unique writer lock - // - bool try_lock() - { - readwrite_ticket new_ticket, old_ticket; - new_ticket = old_ticket = ticket; - - /* - * This write lock can only be granted if the lock was last granted to - * a writer and there are no readers or writers blocked on the lock, - * that is, if this thread's ticket would be the next ticket granted. - * Do the cheap test to see if this can possibly succeed (and confirm - * the lock is in the correct state to grant this write lock). - */ - if (old_ticket.s.writers != old_ticket.s.next) - return false; - - // The replacement lock value is a result of allocating a new ticket. - ++new_ticket.s.next; - - bool granted = - (cmpxchg(&ticket.u, old_ticket.u, new_ticket.u) ? true : false); - if (granted) - { - readlock_ = false; - } - return granted; - } - - // - // obtain a reader lock, many readers may have the lock simultaneously - // - void lock_shared() - { - RWL_DEBUG_MSG("lock rd " - << std::hex << this << " r " << ticket.s.readers << " w " - << ticket.s.writers << " n " << ticket.s.next << " v " - << ticket.s.next); - - // memory ordering rwl_barrier_ - rwl_barrier_(); - - uint16_t val = atomic_xadd(&ticket.s.next, 1); - while (val != ticket.s.readers) - { - hpx::util::detail::yield_k(0, nullptr); - } - readlock_ = true; - - // only one writer can lock, so no need for atomic increment - atomic_inc(&ticket.s.readers); - - // memory ordering rwl_barrier_ - rwl_barrier_(); - } - - // - // unlock one reader - // - void unlock_shared() - { - atomic_inc(&ticket.s.writers); - } - - // - // try to obtain a reader lock - // - bool try_lock_shared() - { - readwrite_ticket new_ticket, old_ticket; - new_ticket = old_ticket = ticket; - // - /* - * This read lock can only be granted if the lock was last granted to - * a reader and there are no readers or writers blocked on the lock, - * that is, if this thread's ticket would be the next ticket granted. - * Do the cheap test to see if this can possibly succeed (and confirm - * the lock is in the correct state to grant this read lock). - */ - if (old_ticket.s.readers != new_ticket.s.next) - return false; - // - /* - * The replacement lock value is a result of allocating a new ticket and - * incrementing the reader value to match it. - */ - new_ticket.s.readers = new_ticket.s.next = old_ticket.s.next + 1; - bool granted = - (cmpxchg(&ticket.u, old_ticket.u, new_ticket.u) ? true : false); - if (granted) - { - readlock_ = true; - } - return granted; - } - - // return true if a reader or writer has the lock - bool owns_lock() - { - return ((ticket.s.writers != ticket.s.next) || - (ticket.s.readers != ticket.s.next)); - } - }; -}}} // namespace hpx::lcos::local - -#if defined(HPX_GCC_VERSION) -#pragma GCC pop_options -#endif diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/receiver.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/receiver.hpp deleted file mode 100644 index 7a2d8859dd44..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/receiver.hpp +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright (c) 2015-2017 John Biddiscombe -// Copyright (c) 2017 Thomas Heller -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -namespace hpx { namespace parcelset { namespace policies { namespace libfabric { - struct parcelport; - // The receiver is responsible for handling incoming messages. For that purpose, - // it posts receive buffers. Incoming messages can be of two kinds: - // 1) An ACK message which has been sent from an rma_receiver, to signal - // the sender about the successful retrieval of an incoming message. - // 2) An incoming parcel, that consists of an header and an eventually - // piggy backed message. If the message is not piggy backed or zero - // copy RMA chunks need to be read, a rma_receiver is created to - // complete the transfer of the message - struct receiver - { - typedef libfabric_region_provider region_provider; - typedef rma_memory_region region_type; - typedef hpx::detail::small_vector zero_copy_vector; - - // -------------------------------------------------------------------- - // construct receive object - receiver(parcelport* pp, fid_ep* endpoint, - rma_memory_pool& memory_pool); - - // -------------------------------------------------------------------- - // these constructors are provided because hpx::lockfree::stack requires them - // they should not be used - receiver(receiver&& other); - receiver& operator=(receiver&& other); - - // -------------------------------------------------------------------- - // destruct receive object - ~receiver(); - - // -------------------------------------------------------------------- - // A received message is routed by the controller into this function. - // it might be an incoming message or just an ack sent to inform that - // all rdma reads are complete from a previous send operation. - void handle_recv(fi_addr_t const& src_addr, std::uint64_t len); - - // -------------------------------------------------------------------- - // the receiver posts a single receive buffer to the queue, attaching - // itself as the context, so that when a message is received - // the owning receiver is called to handle processing of the buffer - void pre_post_receive(); - - // -------------------------------------------------------------------- - // The cleanup call deletes resources and sums counters from internals - // once cleanup is done, the receiver should not be used, other than - // dumping counters - void cleanup(); - - private: - parcelport* pp_; - fid_ep* endpoint_; - region_type* header_region_; - rma_memory_pool* memory_pool_; - // - friend class libfabric_controller; - // - performance_counter messages_handled_; - performance_counter acks_received_; - // from the internal rma_receivers - performance_counter msg_plain_; - performance_counter msg_rma_; - performance_counter sent_ack_; - performance_counter rma_reads_; - performance_counter recv_deletes_; - // - boost::lockfree::stack, - HPX_PARCELPORT_LIBFABRIC_MAX_PREPOSTS, true> - rma_receivers_; - - typedef hpx::spinlock mutex_type; - mutex_type active_receivers_mtx_; - hpx::lcos::local::detail::condition_variable active_receivers_cv_; - hpx::util::atomic_count active_receivers_; - }; -}}}} // namespace hpx::parcelset::policies::libfabric diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_base.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_base.hpp deleted file mode 100644 index da664642f55f..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_base.hpp +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) 2015-2017 John Biddiscombe -// Copyright (c) 2017 Thomas Heller -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -#include - -#include - -namespace hpx { namespace parcelset { namespace policies { namespace libfabric { - struct parcelport; - - struct rma_base - { - rma_base() = default; - virtual ~rma_base() = default; - - // A placeholder to allow sender or rma_received subclasses to gracefully - // handle an error on the network - virtual void handle_error(struct fi_cq_err_entry err) = 0; - }; -}}}} // namespace hpx::parcelset::policies::libfabric diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_memory_pool.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_memory_pool.hpp deleted file mode 100644 index 916901f95a34..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_memory_pool.hpp +++ /dev/null @@ -1,504 +0,0 @@ -// Copyright (c) 2014-2016 John Biddiscombe -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -#include -#include -// -#include -#include -#include -#include -// -#include -// -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// the default memory chunk size in bytes -#define RDMA_POOL_1K_CHUNK_SIZE 0x001 * 0x0400 // 1KB -#define RDMA_POOL_SMALL_CHUNK_SIZE 0x010 * 0x0400 // 16KB -#define RDMA_POOL_MEDIUM_CHUNK_SIZE 0x040 * 0x0400 // 64KB -#define RDMA_POOL_LARGE_CHUNK_SIZE 0x400 * 0x0400 // 1MB - -#define RDMA_POOL_MAX_1K_CHUNKS 1024 -#define RDMA_POOL_MAX_SMALL_CHUNKS 2048 -#define RDMA_POOL_MAX_MEDIUM_CHUNKS 128 -#define RDMA_POOL_MAX_LARGE_CHUNKS 16 - -// if the HPX configuration has set a different value, use it -#if defined(HPX_PARCELPORT_LIBFABRIC_MEMORY_CHUNK_SIZE) -#undef RDMA_POOL_SMALL_CHUNK_SIZE -#define RDMA_POOL_SMALL_CHUNK_SIZE HPX_PARCELPORT_LIBFABRIC_MEMORY_CHUNK_SIZE -#endif - -static_assert( - HPX_PARCELPORT_LIBFABRIC_MEMORY_CHUNK_SIZE < RDMA_POOL_MEDIUM_CHUNK_SIZE, - "Default memory Chunk size must be less than medium chunk size"); - -// Description of memory pool objects: -// -// memory_region_allocator: -// An allocator that returns memory of the requested size. The memory is pinned -// and ready to be used for RDMA operations. A memory_region object is -// used, it contains the memory registration information needed by the libfabric API. -// -// rdma_chunk_pool : -// Allocate N chunks of memory in one go, a single memory registration is -// used for the whole block, and this is divided into smaller blocks that -// are then used by the pool_container. -// @TODO: Note that originally a boost::pool was used but this was replaced by -// a simple block allocation and needs to be cleaned as it puts blocks onto -// a stack that is duplicated in pool_container. -// -// pool_container: -// The pool container wraps an rdma_chunk_pool and provides a stack. When a user -// requests a small block, one is popped off the stack. At startup, the pool_container -// requests a large number of blocks from the rdma_chunk_pool and sets the correct -// address offset within each larger chunk for each small block and pushes the mini -// Region onto the stack. Thus N small rdma_regions are created from a -// single larger one and memory blocks come from contiguous memory. -// -// rma_memory_pool: -// The rma_memory_pool maintains 4 pool_container (stacks) of different sized blocks -// so that most user requests can be fulfilled. -// If a request cannot be filled, the pool can generate temporary blocks with -// new allocations and on-the-fly registration of the memory. -// Additionally, it also provides a simple API so users may pass pre-allocated -// memory to the pool for on-the-fly registration (rdma transfer of user memory chunks) -// and later de-registration. - -namespace hpx { namespace parcelset { - - namespace bl = hpx::lockfree; - - // A simple tag type we use for logging assistance (identification) - struct pool_tiny - { - static const char* desc() - { - return "Tiny "; - } - }; - struct pool_small - { - static const char* desc() - { - return "Small "; - } - }; - struct pool_medium - { - static const char* desc() - { - return "Medium "; - } - }; - struct pool_large - { - static const char* desc() - { - return "Large "; - } - }; - - // -------------------------------------------------------------------- - // allocator for memory_regions - template - struct memory_region_allocator - { - typedef typename RegionProvider::provider_domain domain_type; - typedef rma_memory_region region_type; - typedef std::shared_ptr region_ptr; - - // default empty constructor - memory_region_allocator() {} - - // allocate a registered memory region - static region_ptr malloc(domain_type* pd, const std::size_t bytes) - { - region_ptr region = std::make_shared(); - LOG_DEBUG_MSG( - "Allocating " << hexuint32(bytes) << "using chunk mallocator"); - region->allocate(pd, bytes); - return region; - } - - // release a registered memory region - static void free(region_ptr region) - { - LOG_DEBUG_MSG("Freeing a block from chunk mallocator (ref count) " - << region.use_count()); - region.reset(); - } - }; - - // --------------------------------------------------------------------------- - // pool_container, collect some routines for reuse with - // small, medium, large chunks etc - // --------------------------------------------------------------------------- - template - struct pool_container - { - typedef typename RegionProvider::provider_domain domain_type; - typedef rma_memory_region region_type; - typedef std::shared_ptr region_ptr; - - // ------------------------------------------------------------------------ - pool_container(domain_type* pd) - : accesses_(0) - , in_use_(0) - , pd_(pd) - { - } - - // ------------------------------------------------------------------------ - bool allocate_pool() - { - LOG_DEBUG_MSG(PoolType::desc() - << "Allocating " - << "ChunkSize " << hexuint32(ChunkSize) << "num_chunks " - << decnumber(MaxChunks) << "total " - << hexuint32(ChunkSize * MaxChunks)); - - // Allocate one very large registered block for N small blocks - region_ptr block = Allocator().malloc(pd_, ChunkSize * MaxChunks); - // store a copy of this to make sure it is 'alive' - block_list_[block->get_address()] = block; - - // break the large region into N small regions - uint64_t offset = 0; - for (std::size_t i = 0; i < MaxChunks; ++i) - { - // we must keep a copy of the sub-region since we only pass - // pointers to regions around the code. - region_list_[i] = region_type(block->get_region(), - static_cast(block->get_base_address()) + offset, - static_cast(block->get_base_address()), ChunkSize, - region_type::BLOCK_PARTIAL); - LOG_TRACE_MSG(PoolType::desc() - << "Allocate Block " << decnumber(i) << region_list_[i]); - // push the pointer onto our stack - push(®ion_list_[i]); - offset += ChunkSize; - } - in_use_ = 0; - return true; - } - - // ------------------------------------------------------------------------ - void DeallocatePool() - { - if (in_use_ != 0) - { - LOG_ERROR_MSG(PoolType::desc() - << "Deallocating free_list : Not all blocks were returned " - << " refcounts " << decnumber(in_use_)); - } - region_type* region = nullptr; - while (!free_list_.pop(region)) - { - // clear our stack - delete region; - } - // wipe our copies of sub-regions (no clear function for std::array) - std::fill(region_list_.begin(), region_list_.end(), region_type()); - // release references to shared arrays - block_list_.clear(); - } - - // ------------------------------------------------------------------------ - inline void push(region_type* region) - { - LOG_TRACE_MSG(PoolType::desc() - << "Push block " << *region << "Used " << decnumber(in_use_ - 1) - << "Accesses " << decnumber(accesses_)); - - LOG_EXCLUSIVE( - uintptr_t val = uintptr_t(region->get_address()); LOG_TRACE_MSG( - PoolType::desc() << "Writing 0xdeadbeef to region address " - << hexpointer(val)); - if (region->get_address() != nullptr) { - // get use the pointer to the region - uintptr_t* ptr = reinterpret_cast(val); - for (unsigned int c = 0; c < ChunkSize / 8; ++c) - { - ptr[c] = 0xdeadbeef; - } - }); - - if (!free_list_.push(region)) - { - LOG_ERROR_MSG(PoolType::desc() << "Error in memory pool push"); - } - // decrement one reference - --in_use_; - } - - // ------------------------------------------------------------------------ - inline region_type* pop() - { - // get a block - region_type* region = nullptr; - if (!free_list_.pop(region)) - { - LOG_DEBUG_MSG(PoolType::desc() << "Error in memory pool pop"); - return nullptr; - } - ++in_use_; - ++accesses_; - LOG_TRACE_MSG(PoolType::desc() - << "Pop block " << *region << "Used " << decnumber(in_use_) - << "Accesses " << decnumber(accesses_)); - return region; - } - - // ------------------------------------------------------------------------ - // at shutdown we can disregard any bocks still prepoted as we can't - // unpost them - void decrement_used_count(uint32_t N) - { - in_use_ -= N; - } - - // ------------------------------------------------------------------------ - // for debug log messages - std::string status() - { - std::stringstream temp; - temp << "| " << PoolType::desc() << "ChunkSize " - << hexlength(ChunkSize) << "Free " - << decnumber(MaxChunks - in_use_) << "Used " - << decnumber(in_use_) << "Accesses " << decnumber(accesses_); - return temp.str(); - } - - // ------------------------------------------------------------------------ - constexpr std::size_t chunk_size() const - { - return ChunkSize; - } - // - performance_counter accesses_; - performance_counter in_use_; - // - domain_type* pd_; - std::unordered_map block_list_; - std::array region_list_; - bl::stack> free_list_; - }; - - // --------------------------------------------------------------------------- - // memory pool, holds 4 smaller pools and pops/pushes to the one - // of the right size for the requested data - // --------------------------------------------------------------------------- - template - struct rma_memory_pool - { - HPX_NON_COPYABLE(rma_memory_pool); - - typedef typename RegionProvider::provider_domain domain_type; - typedef rma_memory_region region_type; - typedef memory_region_allocator allocator_type; - typedef std::shared_ptr region_ptr; - - //---------------------------------------------------------------------------- - // constructor - rma_memory_pool(domain_type* pd) - : protection_domain_(pd) - , tiny_(pd) - , small_(pd) - , medium_(pd) - , large_(pd) - , temp_regions(0) - , user_regions(0) - { - tiny_.allocate_pool(); - small_.allocate_pool(); - medium_.allocate_pool(); - large_.allocate_pool(); - LOG_DEBUG_MSG("Completed memory_pool initialization"); - } - - //---------------------------------------------------------------------------- - // destructor - ~rma_memory_pool() - { - deallocate_pools(); - } - - //---------------------------------------------------------------------------- - void deallocate_pools() - { - tiny_.DeallocatePool(); - small_.DeallocatePool(); - medium_.DeallocatePool(); - large_.DeallocatePool(); - } - - // ------------------------- - // User allocation interface - // ------------------------- - // The Region* versions of allocate/deallocate - // should be used in preference to the std:: compatible - // versions using char* for efficiency - - //---------------------------------------------------------------------------- - // query the pool for a chunk of a given size to see if one is available - // this function is 'unsafe' because it is not thread safe and another - // thread may push/pop a block after/during this call and invalidate the result. - inline bool can_allocate_unsafe(size_t length) const - { - if (length <= tiny_.chunk_size()) - { - return !tiny_.free_list_.empty(); - } - else if (length <= small_.chunk_size()) - { - return !small_.free_list_.empty(); - } - else if (length <= medium_.chunk_size()) - { - return !medium_.free_list_.empty(); - } - else if (length <= large_.chunk_size()) - { - return !large_.free_list_.empty(); - } - return true; - } - - //---------------------------------------------------------------------------- - // allocate a region, if size=0 a tiny region is returned - inline region_type* allocate_region(size_t length) - { - region_type* region = nullptr; - // - if (length <= tiny_.chunk_size()) - { - region = tiny_.pop(); - } - else if (length <= small_.chunk_size()) - { - region = small_.pop(); - } - else if (length <= medium_.chunk_size()) - { - region = medium_.pop(); - } - else if (length <= large_.chunk_size()) - { - region = large_.pop(); - } - // if we didn't get a block from the cache, create one on the fly - if (region == nullptr) - { - region = allocate_temporary_region(length); - } - - LOG_TRACE_MSG("Popping Block " - << *region << tiny_.status() << small_.status() - << medium_.status() << large_.status() << large_.status() - << "temp regions " << decnumber(temp_regions)); - // - return region; - } - - //---------------------------------------------------------------------------- - // release a region back to the pool - inline void deallocate(region_type* region) - { - // if this region was registered on the fly, then don't return it to the pool - if (region->get_temp_region() || region->get_user_region()) - { - if (region->get_temp_region()) - { - --temp_regions; - LOG_TRACE_MSG("Deallocating temp region " - << *region << "temp regions " - << decnumber(temp_regions)); - } - else if (region->get_user_region()) - { - --user_regions; - LOG_TRACE_MSG("Deleting (user region) " - << *region << "user regions " - << decnumber(user_regions)); - } - delete region; - return; - } - - // put the block back on the free list - if (region->get_size() <= tiny_.chunk_size()) - { - tiny_.push(region); - } - else if (region->get_size() <= small_.chunk_size()) - { - small_.push(region); - } - else if (region->get_size() <= medium_.chunk_size()) - { - medium_.push(region); - } - else if (region->get_size() <= large_.chunk_size()) - { - large_.push(region); - } - - LOG_TRACE_MSG("Pushing Block " - << *region << tiny_.status() << small_.status() - << medium_.status() << large_.status() << "temp regions " - << decnumber(temp_regions)); - } - - //---------------------------------------------------------------------------- - // allocates a region from the heap and registers it, it bypasses the pool - // when deallocted, it will be unregistered and deleted, not returned to the pool - inline region_type* allocate_temporary_region(std::size_t length) - { - region_type* region = new region_type(); - region->set_temp_region(); - region->allocate(protection_domain_, length); - ++temp_regions; - LOG_TRACE_MSG("Allocating temp region " - << *region << "temp regions " << decnumber(temp_regions)); - return region; - } - - //---------------------------------------------------------------------------- - // protection domain that memory is registered with - domain_type* protection_domain_; - - // maintain 4 pools of thread safe pre-allocated regions of fixed size. - pool_container - tiny_; - pool_container - small_; - pool_container - medium_; - pool_container - large_; - - // counters - hpx::util::atomic_count temp_regions; - hpx::util::atomic_count user_regions; - }; -}} // namespace hpx::parcelset diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_memory_region.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_memory_region.hpp deleted file mode 100644 index 49ee09e49496..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_memory_region.hpp +++ /dev/null @@ -1,324 +0,0 @@ -// Copyright (c) 2015-2016 John Biddiscombe -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -#include -#include -#include -// -#include -// - -namespace hpx { namespace parcelset { - // -------------------------------------------------------------------- - // a base class that provides an API for creating/accessing - // pinned memory blocks. This will be overridden by concrete - // implementations for each parcelport. - struct rma_memory_base - { - rma_memory_base(); - }; - - // -------------------------------------------------------------------- - template - struct rma_memory_region - { - typedef typename RegionProvider::provider_domain provider_domain; - typedef typename RegionProvider::provider_region provider_region; - - // -------------------------------------------------------------------- - rma_memory_region() - : region_(nullptr) - , address_(nullptr) - , base_addr_(nullptr) - , size_(0) - , used_space_(0) - , flags_(0) - { - } - - // -------------------------------------------------------------------- - rma_memory_region(provider_region* region, char* address, - char* base_address, uint64_t size, uint32_t flags) - : region_(region) - , address_(address) - , base_addr_(base_address) - , size_(size) - , used_space_(0) - , flags_(flags) - { - } - - // -------------------------------------------------------------------- - // construct a memory region object by registering an existing address buffer - rma_memory_region( - provider_domain* pd, const void* buffer, const uint64_t length) - { - address_ = static_cast(const_cast(buffer)); - base_addr_ = address_; - size_ = length; - used_space_ = length; - flags_ = BLOCK_USER; - - int ret = traits::rma_memory_region_traits< - RegionProvider>::register_memory(pd, const_cast(buffer), - length, - traits::rma_memory_region_traits::flags(), 0, - (uint64_t) address_, 0, &(region_), nullptr); - - if (ret) - { - LOG_ERROR_MSG("error registering region " << hexpointer(buffer) - << hexlength(length)); - throw std::runtime_error("error in memory registration"); - } - else - { - LOG_DEBUG_MSG("OK registering region " - << hexpointer(buffer) << hexpointer(address_) << "desc " - << hexpointer(fi_mr_desc(region_)) << "rkey " - << hexpointer(fi_mr_key(region_)) << "length " - << hexlength(size_)); - } - } - - // -------------------------------------------------------------------- - // allocate a block of size length and register it - int allocate(provider_domain* pd, uint64_t length) - { - // Allocate storage for the memory region. - void* buffer = new char[length]; - if (buffer != nullptr) - { - LOG_DEBUG_MSG( - "allocated storage for memory region with malloc OK " - << hexnumber(length)); - } - address_ = static_cast(buffer); - base_addr_ = static_cast(buffer); - size_ = length; - used_space_ = 0; - - int ret = traits::rma_memory_region_traits< - RegionProvider>::register_memory(pd, const_cast(buffer), - length, - traits::rma_memory_region_traits::flags(), 0, - (uint64_t) address_, 0, &(region_), nullptr); - - if (ret) - { - LOG_ERROR_MSG("error registering region " << hexpointer(buffer) - << hexlength(length)); - throw std::runtime_error("error in memory registration"); - } - else - { - LOG_DEBUG_MSG("OK registering region " - << hexpointer(buffer) << hexpointer(address_) << "desc " - << hexpointer(fi_mr_desc(region_)) << "rkey " - << hexpointer(fi_mr_key(region_)) << "length " - << hexlength(size_)); - } - - LOG_DEBUG_MSG("allocated/registered memory region " - << hexpointer(this) << "with desc " << hexnumber(get_desc()) - << "at address " << hexpointer(get_address()) << "with length " - << hexlength(get_size())); - return 0; - } - - // -------------------------------------------------------------------- - // destroy the region and memory according to flag settings - ~rma_memory_region() - { - if (get_partial_region()) - return; - release(); - } - - // -------------------------------------------------------------------- - // Deregister and free the memory region. - // returns 0 when successful, -1 otherwise - int release(void) - { - if (region_ != nullptr) - { - LOG_TRACE_MSG("About to release memory region with desc " - << hexpointer(get_desc())); - // get these before deleting/unregistering (for logging) - const void* buffer = get_base_address(); - LOG_EXCLUSIVE(uint32_t length = get_size();); - // - if (traits::rma_memory_region_traits< - RegionProvider>::unregister_memory(region_)) - { - LOG_ERROR_MSG("Error, fi_close mr failed\n"); - return -1; - } - else - { - LOG_DEBUG_MSG("deregistered memory region with desc " - << hexpointer(get_desc()) << "at address " - << hexpointer(buffer) << "with length " - << hexlength(length)); - } - if (!get_user_region()) - { - delete[](static_cast(buffer)); - } - region_ = nullptr; - } - return 0; - } - - // -------------------------------------------------------------------- - // return the address of this memory region block. If this - // is a partial region, then the address will be offset from the - // base address - inline char* get_address(void) const - { - return address_; - } - - // -------------------------------------------------------------------- - // Get the address of the base memory region. - // This is the address of the memory allocated from the system - inline char* get_base_address(void) const - { - return base_addr_; - } - - // -------------------------------------------------------------------- - // Get the size of the memory chunk usable by this memory region, - // this may be smaller than the value returned by get_length - // if the region is a sub region (partial region) within another block - inline uint64_t get_size(void) const - { - return size_; - } - - // -------------------------------------------------------------------- - // Get the local descriptor of the memory region. - inline void* get_desc(void) const - { - return fi_mr_desc(region_); - } - - // -------------------------------------------------------------------- - // Get the remote key of the memory region. - inline uint64_t get_remote_key(void) const - { - return fi_mr_key(region_); - } - - // -------------------------------------------------------------------- - // Set the size used by a message in the memory region. - inline void set_message_length(uint32_t length) - { - used_space_ = length; - } - - // -------------------------------------------------------------------- - // Get the size used by a message in the memory region. - inline uint32_t get_message_length(void) const - { - return used_space_; - } - - // -------------------------------------------------------------------- - // return the underlying infiniband region handle - inline struct fid_mr* get_region() - { - return region_; - } - - // -------------------------------------------------------------------- - // flags used for management of lifetime - enum - { - BLOCK_USER = 1, - BLOCK_TEMP = 2, - BLOCK_PARTIAL = 4, - }; - - // -------------------------------------------------------------------- - // A user allocated region use memory allocated by the user. - // on destruction, the memory is unregistered, but not deleted - inline void set_user_region() - { - flags_ |= BLOCK_USER; - } - - inline bool get_user_region() const - { - return (flags_ & BLOCK_USER) == BLOCK_USER; - } - - // -------------------------------------------------------------------- - // A temp region is one that the memory pool is not managing - // so it is unregistered and deleted when returned to the pool and not reused - inline void set_temp_region() - { - flags_ |= BLOCK_TEMP; - } - - inline bool get_temp_region() const - { - return (flags_ & BLOCK_TEMP) == BLOCK_TEMP; - } - - // -------------------------------------------------------------------- - // a partial region is a subregion of a larger memory region - // on destruction, it is not unregistered or deleted as the 'parent' region - // will delete many partial regions on destruction - inline void set_partial_region() - { - flags_ |= BLOCK_PARTIAL; - } - - inline bool get_partial_region() const - { - return (flags_ & BLOCK_PARTIAL) == BLOCK_PARTIAL; - } - - // -------------------------------------------------------------------- - friend std::ostream& operator<<( - std::ostream& os, rma_memory_region const& region) - { - os << "region " << hexpointer(®ion) << "base address " - << hexpointer(region.base_addr_) << "address " - << hexpointer(region.address_) << "flags " - << hexbyte(region.flags_) << "size " << hexlength(region.size_) - << "used_space_ " << hexlength(region.used_space_); - return os; - } - - private: - // The internal network type dependent memory region handle - provider_region* region_; - - // we may be a piece of a larger region, this gives the start address - // of this piece of the region. This is the address that should be used for data - // storage - char* address_; - - // if we are part of a larger region, this is the base address of - // that larger region - char* base_addr_; - - // The size of the memory buffer, if this is a partial region - // it will be smaller than the value returned by region_->length - uint64_t size_; - - // space used by a message in the memory region. - uint64_t used_space_; - - // flags to control lifetime of blocks - uint32_t flags_; - }; - -}} // namespace hpx::parcelset diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_memory_region_traits.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_memory_region_traits.hpp deleted file mode 100644 index b2584280b570..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_memory_region_traits.hpp +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2017 John Biddiscombe -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -#include - -#include -// -namespace hpx { namespace traits { - - template - struct rma_memory_region_traits - { - typedef typename RegionProvider::provider_domain provider_domain; - typedef typename RegionProvider::provider_region provider_region; - // - static int register_memory(provider_domain* pd, const void* buf, - size_t len, uint64_t access, uint64_t offset, - uint64_t requested_key, uint64_t flags, provider_region** mr, - void* context) - { - return RegionProvider::register_memory(pd, buf, len, access, offset, - requested_key, flags, mr, context); - } - // - static int unregister_memory(provider_region* mr) - { - return RegionProvider::unregister_memory(mr); - } - // - static int flags() - { - return RegionProvider::flags(); - } - }; -}} // namespace hpx::traits diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_receiver.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_receiver.hpp deleted file mode 100644 index 6462a57d1f46..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/rma_receiver.hpp +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2015-2017 John Biddiscombe -// Copyright (c) 2017 Thomas Heller -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -namespace hpx { namespace parcelset { namespace policies { namespace libfabric { - struct parcelport; - - // The rma_receiver is responsible for receiving the - // missing chunks of the message: - // 1) Non-piggy backed non-zero copy chunks (if existing) - // 2) The zero copy chunks from serialization - struct rma_receiver : public rma_base - { - typedef libfabric_region_provider region_provider; - typedef rma_memory_region region_type; - typedef rma_memory_pool memory_pool_type; - typedef hpx::detail::small_vector zero_copy_vector; - - typedef header - header_type; - static constexpr unsigned int header_size = - header_type::header_block_size; - - typedef serialization::serialization_chunk chunk_struct; - typedef hpx::function completion_handler; - - rma_receiver(parcelport* pp, fid_ep* endpoint, - memory_pool_type* memory_pool, completion_handler&& handler); - - ~rma_receiver(); - - // -------------------------------------------------------------------- - // the main entry point when a message is received, this function - // will dispatch to either read with or without rma depending on - // whether there are zero copy chunks to handle - void read_message(region_type* region, fi_addr_t const& src_addr); - - // -------------------------------------------------------------------- - // Process a message that has no zero copy chunks - void handle_message_no_rma(); - - // -------------------------------------------------------------------- - // Process a message that has zero copy chunks. for each chunk we - // make an RMA read request - void handle_message_with_zerocopy_rma(); - - // -------------------------------------------------------------------- - // Process a message where the chunk inf0ormation did not fit into - // the header. An extra RMA read of chunk data must be made before - // the chunks can be identified (and possibly retrieved from the remote node) - void handle_message_no_chunk_data(); - - // -------------------------------------------------------------------- - // After remote chunks have been read by rma, process the chunk list - // and initiate further rma reads if necessary - int handle_chunks_read_message(); - - // -------------------------------------------------------------------- - // Each RMA read completion will enter this function and count down until - // all are done, then we can process the parcel and cleanup - void handle_rma_read_completion(); - - // -------------------------------------------------------------------- - // Once all RMA reads are complete, we must send an ack to the origin - // of the parcel so that it can release the RMA regions it is holding onto - void send_rdma_complete_ack(); - - // -------------------------------------------------------------------- - // After message processing is complete, this routine cleans up and resets - void cleanup_receive(); - - // -------------------------------------------------------------------- - void handle_error(struct fi_cq_err_entry err) override; - - // -------------------------------------------------------------------- - // convenience function to execute a read for each zero-copy chunk - // in the chunks_ variable - void read_chunk_list(); - - // -------------------------------------------------------------------- - // convenience function to execute a read, given the right params - void read_one_chunk(fi_addr_t src_addr, region_type* get_region, - const void* remoteAddr, uint64_t rkey); - - private: - parcelport* pp_; - fid_ep* endpoint_; - region_type* header_region_; - region_type* chunk_region_; - region_type* message_region_; - header_type* header_; - std::vector chunks_; - zero_copy_vector rma_regions_; - rma_memory_pool* memory_pool_; - fi_addr_t src_addr_; - completion_handler handler_; - hpx::util::atomic_count rma_count_; - bool chunk_fetch_; - - double start_time_; - - // - friend class receiver; - performance_counter msg_plain_; - performance_counter msg_rma_; - performance_counter sent_ack_; - performance_counter rma_reads_; - performance_counter recv_deletes_; - }; -}}}} // namespace hpx::parcelset::policies::libfabric diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/sender.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/sender.hpp deleted file mode 100644 index a9c17dcc65ce..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/sender.hpp +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright (c) 2015-2017 John Biddiscombe -// Copyright (c) 2017 Thomas Heller -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -// include for iovec -#include - -namespace hpx { namespace parcelset { namespace policies { namespace libfabric { - struct parcelport; - - struct sender : public rma_base - { - typedef libfabric_region_provider region_provider; - typedef rma_memory_region region_type; - typedef rma_memory_pool memory_pool_type; - - typedef header - header_type; - static constexpr unsigned int header_size = - header_type::header_block_size; - - typedef pinned_memory_vector - snd_data_type; - typedef parcel_buffer - snd_buffer_type; - - typedef hpx::detail::small_vector zero_copy_vector; - - // -------------------------------------------------------------------- - sender(parcelport* pp, fid_ep* endpoint, fid_domain* domain, - memory_pool_type* memory_pool) - : parcelport_(pp) - , endpoint_(endpoint) - , domain_(domain) - , memory_pool_(memory_pool) - , buffer_(snd_data_type(memory_pool_), *memory_pool_) - , header_region_(nullptr) - , chunk_region_(nullptr) - , message_region_(nullptr) - , completion_count_(0) - , sends_posted_(0) - , sends_deleted_(0) - , acks_received_(0) - { - // the header region is reused multiple times - header_region_ = memory_pool_->allocate_region( - memory_pool_->small_.chunk_size()); - LOG_DEBUG_MSG("Create sender: " << hexpointer(this)); - } - - // -------------------------------------------------------------------- - ~sender() - { - memory_pool_->deallocate(header_region_); - } - - // -------------------------------------------------------------------- - snd_buffer_type get_new_buffer() - { - LOG_DEBUG_MSG("Returning a new buffer object from sender " - << hexpointer(this)); - return snd_buffer_type(snd_data_type(memory_pool_), *memory_pool_); - } - - // -------------------------------------------------------------------- - // @TODO: unused, but required by the parcelport interface - template - void async_write(Handler&&, ParcelPostprocess&&) - { - HPX_ASSERT(false); - } - - // -------------------------------------------------------------------- - // @TODO: unused, but required by the parcelport interface - void verify_(parcelset::locality const& /*parcel_locality_id*/) const {} - - // -------------------------------------------------------------------- - // The main message send routine : package the header, send it - // with an optional extra message region if it cannot be piggybacked - // send chunk/rma information for all zero copy serialization regions - void async_write_impl(); - - // -------------------------------------------------------------------- - // Called when a send completes - void handle_send_completion(); - - // -------------------------------------------------------------------- - // Triggered when the remote end has finished RMA operations and - // we can release resources - void handle_message_completion_ack(); - - // -------------------------------------------------------------------- - // Cleanup memory regions we are holding onto etc - void cleanup(); - - // -------------------------------------------------------------------- - // if a send completion reports failure, we can retry the send - void handle_error(struct fi_cq_err_entry err) override; - - // -------------------------------------------------------------------- - parcelport* parcelport_; - fid_ep* endpoint_; - fid_domain* domain_; - memory_pool_type* memory_pool_; - fi_addr_t dst_addr_; - snd_buffer_type buffer_; - region_type* header_region_; - region_type* chunk_region_; - region_type* message_region_; - header_type* header_; - zero_copy_vector rma_regions_; - hpx::util::atomic_count completion_count_; - - // principally for debugging - performance_counter sends_posted_; - performance_counter sends_deleted_; - performance_counter acks_received_; - // - hpx::move_only_function handler_; - hpx::function postprocess_handler_; - // - struct iovec region_list_[2]; - void* desc_[2]; - }; -}}}} // namespace hpx::parcelset::policies::libfabric diff --git a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/unordered_map.hpp b/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/unordered_map.hpp deleted file mode 100644 index f9e40052f0ae..000000000000 --- a/libs/full/parcelport_libfabric/include/hpx/parcelport_libfabric/unordered_map.hpp +++ /dev/null @@ -1,386 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2016 John Biddiscombe -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -//////////////////////////////////////////////////////////////////////////////// - -#pragma once - -#include - -#include -#include -#include -// -#include -#include - -// A quick wrapper around an unordered_map with a mutex to ensure two -// threads don't simultaneously write or read during write. -// Warning: not thread safe to use iterators whilst others are changing the map -// obtain a read lock before iterating using the provided mutex and function -// map_type::map_read_lock_type read_lock(map.read_write_mutex()); -// in order to safely iterate over contents and block any writers from gaining access -// -namespace hpx { namespace concurrent { - - template , - class KeyEqual = std::equal_to, - class Allocator = std::allocator>> - class unordered_map - { - public: - typedef hpx::lcos::local::readers_writer_mutex rw_mutex_type; - typedef std::unique_lock write_lock; - typedef std::shared_lock read_lock; - typedef std::defer_lock_t defer_lock; - - private: - typedef std::unordered_map - base_map; - base_map map_; - mutable rw_mutex_type mutex_; - mutable read_lock iterator_lock_; - - public: - typedef typename base_map::key_type key_type; - typedef typename base_map::mapped_type mapped_type; - typedef typename base_map::value_type value_type; - typedef typename base_map::size_type size_type; - typedef typename base_map::difference_type difference_type; - typedef typename base_map::hasher hasher; - typedef typename base_map::key_equal key_equal; - typedef typename base_map::allocator_type allocator_type; - typedef typename base_map::reference reference; - typedef typename base_map::const_reference const_reference; - typedef typename base_map::pointer pointer; - typedef typename base_map::const_pointer const_pointer; - typedef typename base_map::iterator iterator; - typedef typename base_map::const_iterator const_iterator; - typedef typename base_map::local_iterator local_iterator; - typedef typename base_map::const_local_iterator const_local_iterator; - // - typedef read_lock map_read_lock_type; - typedef write_lock map_write_lock_type; - - public: - // - // construct/destroy/copy - // - explicit unordered_map(size_type n = 64, const hasher& hf = hasher(), - const key_equal& eql = key_equal(), - const allocator_type& a = allocator_type()) - : map_(n, hf, eql, a) - , iterator_lock_(mutex_, defer_lock()) - { - } - - template - unordered_map(InputIterator first, InputIterator last, size_type n = 64, - const hasher& hf = hasher(), const key_equal& eql = key_equal(), - const allocator_type& a = allocator_type()) - : map_(first, last, n, hf, eql, a) - , iterator_lock_(mutex_, defer_lock()) - { - } - - unordered_map(const unordered_map& other) - : map_(other) - , mutex_() - , iterator_lock_(mutex_, defer_lock()) - { - } - - explicit unordered_map(const allocator_type& a) - : map_(a) - , iterator_lock_(mutex_, defer_lock()) - { - } - - unordered_map(const unordered_map& other, const allocator_type& a) - : map_(other, a) - , iterator_lock_(mutex_, defer_lock()) - { - } - - // C++11 specific - unordered_map(unordered_map&& other) noexcept - : map_(HPX_FORWARD(unordered_map, other)) - , mutex_() - , iterator_lock_(mutex_, defer_lock()) - { - } - - unordered_map(unordered_map&& other, const allocator_type& a) - : map_(HPX_FORWARD(unordered_map, other), a) - , iterator_lock_(mutex_, defer_lock()) - { - } - - unordered_map(std::initializer_list il, size_type n = 64, - const hasher& hf = hasher(), const key_equal& eql = key_equal(), - const allocator_type& a = allocator_type()) - : map_(il, n, hf, eql, a) - , iterator_lock_(mutex_, defer_lock()) - { - } - - ~unordered_map() - { - write_lock lock(mutex_); - map_.clear(); - }; - - unordered_map& operator=(const unordered_map& other) - { - write_lock lock(other.mutex_); - map_ = other; - return *this; - } - - // C++11 specific - unordered_map& operator=(unordered_map&& other) noexcept - { - write_lock lock(other.mutex_); - mutex_ = rw_mutex_type(); - map_ = HPX_MOVE(other); - return *this; - } - - unordered_map& operator=(std::initializer_list il) - { - write_lock lock(mutex_); - map_ = il; - return *this; - } - - void swap(unordered_map& other) - { - write_lock lock(mutex_); - map_.swap(other); - } - - // - // modifiers - // - std::pair insert(const value_type& x) - { - write_lock lock(mutex_); - return map_.insert(x); - } - - iterator insert(const_iterator hint, const value_type& x) - { - write_lock lock(mutex_); - return map_.insert(hint, x); - } - - template - void insert(InputIterator first, InputIterator last) - { - write_lock lock(mutex_); - map_.insert(first, last); - } - - // C++11 specific - std::pair insert(value_type&& x) - { - write_lock lock(mutex_); - return map_.insert(HPX_FORWARD(value_type, x)); - } - - iterator insert(const_iterator hint, value_type&& x) - { - write_lock lock(mutex_); - return map_.insert(hint, HPX_FORWARD(value_type, x)); - } - - void insert(std::initializer_list il) - { - write_lock lock(mutex_); - map_.insert(il); - } - - // C++11 specific - template - std::pair emplace(Args&&... args) - { - write_lock lock(mutex_); - map_.emplace(HPX_FORWARD(Args, args)...); - } - - template - iterator emplace_hint(const_iterator hint, Args&&... args) - { - write_lock lock(mutex_); - return map_.emplace_hint(hint, HPX_FORWARD(Args, args)...); - } - - // modifiers - iterator erase(const_iterator position) - { - write_lock lock(mutex_); - return map_.erase(position); - } - - size_type erase(const key_type& k) - { - write_lock lock(mutex_); - return map_.erase(k); - } - - iterator erase(const_iterator first, const_iterator last) - { - write_lock lock(mutex_); - return map_.erase(first, last); - } - - void clear() - { - write_lock lock(mutex_); - map_.clear(); - } - - // - // size and capacity - // - bool empty() const - { - read_lock lock(mutex_); - return map_.empty(); - } - size_type size() const - { - read_lock lock(mutex_); - return map_.size(); - } - size_type max_size() const - { - read_lock lock(mutex_); - return map_.max_size(); - }; - - // - // iterators - not thread safe to access these - // obtain a read_lock before iterating, and release when done - // - iterator begin() - { - return map_.begin(); - } - const_iterator begin() const - { - return map_.begin(); - }; - iterator end() - { - return map_.end(); - }; - const_iterator end() const - { - return map_.end(); - }; - const_iterator cbegin() const - { - return map_.cbegin(); - }; - const_iterator cend() const - { - return map_.cbegin(); - }; - - // - // Before iterating over the map one must obtain a read lock, - // one may use this mutex to gain a lock as follows - // - // map_type::map_read_lock_type read_lock(map.read_write_mutex()); - // - rw_mutex_type& read_write_mutex() - { - return mutex_; - } - - // - // observers - // - hasher hash_function() const - { - return map_.hash_function(); - } - key_equal key_eq() const - { - return map_.key_eq(); - } - - // - // lookup - // - iterator find(const key_type& k) - { - read_lock lock(mutex_); - return map_.find(k); - } - - const_iterator find(const key_type& k) const - { - read_lock lock(mutex_); - return map_.find(k); - } - - std::pair is_in_map(const key_type& k) const - { - read_lock lock(mutex_); - const_iterator it = map_.find(k); - bool result = (it != map_.end()); - return std::make_pair(it, result); - ; - } - - size_type count(const key_type& k) const - { - read_lock lock(mutex_); - return map_.count(k); - } - - std::pair equal_range(const key_type& k) - { - read_lock lock(mutex_); - return map_.equal_range(k); - } - - std::pair equal_range( - const key_type& k) const - { - read_lock lock(mutex_); - return map_.equal_range(k); - }; - - // - // map operators - // - - // - // operator[] should only be used for reading, if writing, use insert - // which will take a write_lock for safety - // - const mapped_type& operator[](const key_type& k) const - { - read_lock lock(mutex_); - return map_.at(k); - }; - - mapped_type& at(const key_type& k) - { - read_lock lock(mutex_); - return map_.at(k); - }; - - const mapped_type& at(const key_type& k) const - { - read_lock lock(mutex_); - return map_.at(k); - }; - }; -}} // namespace hpx::concurrent diff --git a/libs/full/parcelport_libfabric/src/parcelport_libfabric.cpp b/libs/full/parcelport_libfabric/src/parcelport_libfabric.cpp deleted file mode 100644 index cc1d2291effd..000000000000 --- a/libs/full/parcelport_libfabric/src/parcelport_libfabric.cpp +++ /dev/null @@ -1,433 +0,0 @@ -// Copyright (c) 2015-2016 John Biddiscombe -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -// config -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -// The memory pool specialization need to be pulled in before encode_parcels -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -// -------------------------------------------------------------------- -// Controls whether we are allowed to suspend threads that are sending -// when we have maxed out the number of sends we can handle -#define HPX_PARCELPORT_LIBFABRIC_SUSPEND_WAKE \ - (HPX_PARCELPORT_LIBFABRIC_THROTTLE_SENDS / 2) - -// -------------------------------------------------------------------- -// Enable the use of hpx small_vector for certain short lived storage -// elements within the parcelport. This can reduce some memory allocations -#define HPX_PARCELPORT_LIBFABRIC_USE_SMALL_VECTOR true - -#if HPX_PARCELPORT_LIBFABRIC_USE_SMALL_VECTOR -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace hpx::parcelset::policies; - -namespace hpx::parcelset::policies::libfabric { - // -------------------------------------------------------------------- - // parcelport, the implementation of the parcelport itself - // -------------------------------------------------------------------- - - // -------------------------------------------------------------------- - // Constructor : mostly just initializes the superclass with 'here' - // -------------------------------------------------------------------- - parcelport::parcelport(util::runtime_configuration const& ini, - threads::policies::callback_notifier const& notifier) - : base_type(ini, parcelset::locality(locality()), notifier) - , stopped_(false) - , completions_handled_(0) - , senders_in_use_(0) - { - FUNC_START_DEBUG_MSG; - - // if we are not enabled, then skip allocating resources - parcelport_enabled_ = hpx::util::get_entry_as( - ini, "hpx.parcel.libfabric.enable", 0); - LOG_DEBUG_MSG("Got enabled " << parcelport_enabled_); - - bootstrap_enabled_ = ("libfabric" == - hpx::util::get_entry_as( - ini, "hpx.parcel.bootstrap", "")); - LOG_DEBUG_MSG("Got bootstrap " << bootstrap_enabled_); - - if (!parcelport_enabled_) - return; - - // Get parameters that determine our fabric selection - std::string provider = ini.get_entry( - "hpx.parcel.libfabric.provider", HPX_PARCELPORT_LIBFABRIC_PROVIDER); - std::string domain = ini.get_entry( - "hpx.parcel.libfabric.domain", HPX_PARCELPORT_LIBFABRIC_DOMAIN); - std::string endpoint = ini.get_entry( - "hpx.parcel.libfabric.endpoint", HPX_PARCELPORT_LIBFABRIC_ENDPOINT); - - LOG_DEBUG_MSG("libfabric parcelport function using attributes " - << provider << " " << domain << " " << endpoint); - - // create our main fabric control structure - libfabric_controller_ = - std::make_shared(provider, domain, endpoint); - - // get 'this' locality from the controller - LOG_DEBUG_MSG("Getting local locality object"); - const locality& local = libfabric_controller_->here(); - here_ = parcelset::locality(local); - // and make a note of our ip address for convenience - ip_addr_ = local.ip_address(); - - FUNC_END_DEBUG_MSG; - } - - // -------------------------------------------------------------------- - // during bootup, this is used by the service threads - void parcelport::io_service_work() - { - while (hpx::is_starting()) - { - background_work(0, parcelport_background_mode::all); - } - LOG_DEBUG_MSG("io service task completed"); - } - - // -------------------------------------------------------------------- - // Start the handling of communication. - bool parcelport::do_run() - { - if (!parcelport_enabled_) - return false; - -#ifndef HPX_PARCELPORT_LIBFABRIC_HAVE_PMI - libfabric_controller_->initialize_localities(); -#endif - - FUNC_START_DEBUG_MSG; - libfabric_controller_->startup(this); - - LOG_DEBUG_MSG("Fetching memory pool"); - chunk_pool_ = &libfabric_controller_->get_memory_pool(); - - for (std::size_t i = 0; i < HPX_PARCELPORT_LIBFABRIC_THROTTLE_SENDS; - ++i) - { - sender* snd = new sender(this, libfabric_controller_->ep_active_, - libfabric_controller_->get_domain(), chunk_pool_); - snd->postprocess_handler_ = [this](sender* s) { - --senders_in_use_; - senders_.push(s); - trigger_pending_work(); - }; - senders_.push(snd); - } - - if (bootstrap_enabled_) - { - for (std::size_t i = 0; i != io_service_pool_.size(); ++i) - { - io_service_pool_.get_io_service(int(i)).post( - hpx::bind(&parcelport::io_service_work, this)); - } - } - return true; - } - - // -------------------------------------------------------------------- - // return a sender object back to the parcelport_impl - // this is used by the send_immediate version of parcelport_impl - // -------------------------------------------------------------------- - sender* parcelport::get_connection( - parcelset::locality const& dest, fi_addr_t& fi_addr) - { - sender* snd = nullptr; - if (senders_.pop(snd)) - { - FUNC_START_DEBUG_MSG; - const locality& fabric_locality = dest.get(); - LOG_DEBUG_MSG("get_fabric_address from " - << ipaddress(here_.get().ip_address()) << "to " - << ipaddress(fabric_locality.ip_address())); - ++senders_in_use_; - fi_addr = - libfabric_controller_->get_fabric_address(fabric_locality); - FUNC_END_DEBUG_MSG; - return snd; - } - // else if(threads::get_self_ptr()) - // // else if(this_thread::has_sufficient_stack_space()) - // { - // // background_work_OS_thread(); - // hpx::this_thread::suspend( - // hpx::threads::thread_schedule_state::pending_boost, - // "libfabric::parcelport::async_write"); - // } - - // if no senders are available shutdown - FUNC_END_DEBUG_MSG; - return nullptr; - } - - void parcelport::reclaim_connection(sender* s) - { - --senders_in_use_; - senders_.push(s); - } - - // -------------------------------------------------------------------- - // return a sender object back to the parcelport_impl - // this is for compatibility with non send_immediate operation - // -------------------------------------------------------------------- - std::shared_ptr parcelport::create_connection( - parcelset::locality const&, error_code&) - { - LOG_DEBUG_MSG("Creating new sender"); - return std::shared_ptr(); - } - - // -------------------------------------------------------------------- - // cleanup - parcelport::~parcelport() - { - FUNC_START_DEBUG_MSG; - scoped_lock lk(stop_mutex); - sender* snd = nullptr; - - unsigned int sends_posted = 0; - unsigned int sends_deleted = 0; - unsigned int acks_received = 0; - // - while (senders_.pop(snd)) - { - LOG_DEBUG_MSG("Popped a sender for delete " << hexpointer(snd)); - sends_posted += snd->sends_posted_; - sends_deleted += snd->sends_deleted_; - acks_received += snd->acks_received_; - delete snd; - } - LOG_DEBUG_MSG("sends_posted " - << decnumber(sends_posted) << "sends_deleted " - << decnumber(sends_deleted) << "acks_received " - << decnumber(acks_received) << "non_rma-send " - << decnumber(sends_posted - acks_received)); - // - libfabric_controller_ = nullptr; - FUNC_END_DEBUG_MSG; - } - - // -------------------------------------------------------------------- - /// Should not be used any more as parcelport_impl handles this? - bool parcelport::can_bootstrap() const - { - FUNC_START_DEBUG_MSG; - bool can_boot = HPX_PARCELPORT_LIBFABRIC_HAVE_BOOTSTRAPPING(); - LOG_TRACE_MSG("Returning " << can_boot << " from can_bootstrap") - FUNC_END_DEBUG_MSG; - return can_boot; - } - - // -------------------------------------------------------------------- - /// return a string form of the locality name - std::string parcelport::get_locality_name() const - { - FUNC_START_DEBUG_MSG; - // return hostname:iblibfabric ip address - std::stringstream temp; - temp << asio::ip::host_name() << ":" << ipaddress(ip_addr_); - std::string tstr = temp.str(); - FUNC_END_DEBUG_MSG; - return tstr.substr(0, tstr.size() - 1); - } - - // -------------------------------------------------------------------- - // the root node has spacial handling, this returns its Id - parcelset::locality parcelport::agas_locality( - util::runtime_configuration const&) const - { - FUNC_START_DEBUG_MSG; - // load all components as described in the configuration information - if (!bootstrap_enabled_) - { - LOG_ERROR_MSG( - "Should only return agas locality when bootstrapping"); - } - FUNC_END_DEBUG_MSG; - return parcelset::locality(libfabric_controller_->agas_); - } - - // -------------------------------------------------------------------- - parcelset::locality parcelport::create_locality() const - { - FUNC_START_DEBUG_MSG; - FUNC_END_DEBUG_MSG; - return parcelset::locality(locality()); - } - - // -------------------------------------------------------------------- - /// for debugging - void parcelport::suspended_task_debug(const std::string& match) - { - std::string temp = hpx::util::debug::suspended_task_backtraces(); - if (match.size() == 0 || temp.find(match) != std::string::npos) - { - LOG_DEBUG_MSG("Suspended threads " << temp); - } - } - - // -------------------------------------------------------------------- - /// stop the parcelport, prior to shutdown - void parcelport::do_stop() - { - LOG_DEBUG_MSG("Entering libfabric stop "); - FUNC_START_DEBUG_MSG; - if (!stopped_) - { - // we don't want multiple threads trying to stop the clients - scoped_lock lock(stop_mutex); - - LOG_DEBUG_MSG("Removing all initiated connections"); - libfabric_controller_->disconnect_all(); - - // wait for all clients initiated elsewhere to be disconnected - while (libfabric_controller_->active() /*&& !hpx::is_stopped()*/) - { - completions_handled_ += - libfabric_controller_->poll_endpoints(true); - LOG_TIMED_INIT(disconnect_poll); - LOG_TIMED_BLOCK(disconnect_poll, DEVEL, 5.0, - { LOG_DEBUG_MSG("Polling before shutdown"); }) - } - LOG_DEBUG_MSG("stopped removing clients and terminating"); - } - stopped_ = true; - // Stop receiving and sending of parcels - } - - // -------------------------------------------------------------------- - bool parcelport::can_send_immediate() - { - // hpx::util::yield_while([this]() - // { - // this->background_work(0); - // return this->senders_.empty(); - // }, "libfabric::can_send_immediate"); - - return true; - } - - // -------------------------------------------------------------------- - template - bool parcelport::async_write( - Handler&& handler, sender* snd, fi_addr_t addr, snd_buffer_type& buffer) - { - LOG_DEBUG_MSG( - "parcelport::async_write using sender " << hexpointer(snd)); - snd->dst_addr_ = addr; - snd->buffer_ = HPX_MOVE(buffer); - HPX_ASSERT(!snd->handler_); - snd->handler_ = HPX_FORWARD(Handler, handler); - snd->async_write_impl(); - // after a send poll to make progress on the network and - // reduce latencies for receives coming in - // background_work_OS_thread(); - // if (hpx::threads::get_self_ptr()) - // hpx::this_thread::suspend( - // hpx::threads::thread_schedule_state::pending_boost, - // "libfabric::parcelport::async_write"); - return true; - } - - // -------------------------------------------------------------------- - // This is called to poll for completions and handle all incoming messages - // as well as complete outgoing messages. - // - // Since the parcelport can be serviced by hpx threads or by OS threads, - // we must use extra care when dealing with mutexes and condition_variables - // since we do not want to suspend an OS thread, but we do want to suspend - // hpx threads when necessary. - // - // NB: There is no difference any more between background polling work - // on OS or HPX as all has been tested thoroughly - // -------------------------------------------------------------------- - inline bool parcelport::background_work_OS_thread() - { - LOG_TIMED_INIT(background); - bool done = false; - do - { - LOG_TIMED_BLOCK(background, DEVEL, 5.0, { - LOG_DEBUG_MSG( - "number of senders in use " << decnumber(senders_in_use_)); - }); - // if an event comes in, we may spend time processing/handling it - // and another may arrive during this handling, - // so keep checking until none are received - // libfabric_controller_->refill_client_receives(false); - int numc = libfabric_controller_->poll_endpoints(); - completions_handled_ += numc; - done = (numc == 0); - } while (!done); - return (done != 0); - } - - // -------------------------------------------------------------------- - // Background work - // - // This is called whenever the main thread scheduler is idling, - // is used to poll for events, messages on the libfabric connection - // -------------------------------------------------------------------- - bool parcelport::background_work(std::size_t, parcelport_background_mode) - { - if (stopped_ || hpx::is_stopped()) - { - return false; - } - return background_work_OS_thread(); - } -} // namespace hpx::parcelset::policies::libfabric - -HPX_REGISTER_PARCELPORT( - hpx::parcelset::policies::libfabric::parcelport, libfabric) diff --git a/libs/full/parcelport_libfabric/src/receiver.cpp b/libs/full/parcelport_libfabric/src/receiver.cpp deleted file mode 100644 index 6ced801dc0be..000000000000 --- a/libs/full/parcelport_libfabric/src/receiver.cpp +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright (c) 2015-2017 John Biddiscombe -// Copyright (c) 2017 Thomas Heller -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -namespace hpx::parcelset::policies::libfabric { - - // -------------------------------------------------------------------- - receiver::receiver(parcelport* pp, fid_ep* endpoint, - rma_memory_pool& memory_pool) - : pp_(pp) - , endpoint_(endpoint) - , header_region_( - memory_pool.allocate_region(memory_pool.small_.chunk_size())) - , memory_pool_(&memory_pool) - , messages_handled_(0) - , acks_received_(0) - , active_receivers_(0) - { - LOG_DEBUG_MSG("created receiver: " << hexpointer(this)); - // Once constructed, we need to post the receive... - pre_post_receive(); - } - - // these constructors are provided because hpx::lockfree::stack requires them - // they should not be used - receiver::receiver(receiver&&) - : active_receivers_(0) - { - std::terminate(); - } - receiver& receiver::operator=(receiver&&) - { - std::terminate(); - } - - // -------------------------------------------------------------------- - receiver::~receiver() - { - if (header_region_ && memory_pool_) - { - memory_pool_->deallocate(header_region_); - } - // this is safe to call twice - it might have been called already - // to collect counter information by the fabric controller - cleanup(); - } - - // -------------------------------------------------------------------- - void receiver::cleanup() - { - rma_receiver* rcv = nullptr; - while (receiver::rma_receivers_.pop(rcv)) - { - msg_plain_ += rcv->msg_plain_; - msg_rma_ += rcv->msg_rma_; - sent_ack_ += rcv->sent_ack_; - rma_reads_ += rcv->rma_reads_; - recv_deletes_ += rcv->recv_deletes_; - delete rcv; - } - } - - // -------------------------------------------------------------------- - // when a receive completes, this callback handler is called - void receiver::handle_recv(fi_addr_t const& src_addr, std::uint64_t len) - { - FUNC_START_DEBUG_MSG; - static_assert(sizeof(std::uint64_t) == sizeof(std::size_t), - "sizeof(std::uint64_t) != sizeof(std::size_t)"); - - // If we receive a message of 8 bytes, we got a tag and need to handle - // the tag completion... - if (len <= sizeof(std::uint64_t)) - { - // @TODO: fixme immediate tag retrieval - // Get the sender that has completed rma operations and signal to it - // that it can now cleanup - all remote get operations are done. - sender* snd = - *reinterpret_cast(header_region_->get_address()); - pre_post_receive(); - LOG_DEBUG_MSG("Handling sender tag (RMA ack) completion: " - << hexpointer(snd)); - ++acks_received_; - snd->handle_message_completion_ack(); - return; - } - - LOG_DEBUG_MSG("Handling message"); - rma_receiver* recv = nullptr; - if (!receiver::rma_receivers_.pop(recv)) - { - auto f = [this](rma_receiver* recv) { - --active_receivers_; - if (!receiver::rma_receivers_.push(recv)) - { - // if the capacity overflowed, just delete this one - delete recv; - } - // Notify one possibly waiting receiver that one receive just - // finished - if (threads::threadmanager_is_at_least(hpx::state::running) && - hpx::threads::get_self_ptr()) - { - std::unique_lock l(active_receivers_mtx_); - active_receivers_cv_.notify_one(HPX_MOVE(l)); - } - }; - // throttle the creation of new receivers. Wait until the - // active_receivers_count drops below the maximum. This can not be - // a busy wait since it could potentially block all background - // threads. - const long max_receivers = HPX_PARCELPORT_LIBFABRIC_MAX_PREPOSTS; - if (threads::threadmanager_is_at_least(hpx::state::running) && - hpx::threads::get_self_ptr()) - { - while (active_receivers_ > max_receivers) - { - std::unique_lock l(active_receivers_mtx_); - active_receivers_cv_.wait(l); - } - } - - recv = new rma_receiver(pp_, endpoint_, memory_pool_, HPX_MOVE(f)); - } - ++active_receivers_; - - HPX_ASSERT(recv); - - // We save the received region and swap it with a newly allocated one - // so that we can post a recv again as soon as possible. - region_type* region = header_region_; - header_region_ = - memory_pool_->allocate_region(memory_pool_->small_.chunk_size()); - pre_post_receive(); - - // we dispatch our work to our rma_receiver once it completed the - // prior message. The saved region is passed to the rma handler - ++messages_handled_; - recv->read_message(region, src_addr); - - FUNC_END_DEBUG_MSG; - } - - // -------------------------------------------------------------------- - void receiver::pre_post_receive() - { - FUNC_START_DEBUG_MSG; - void* desc = header_region_->get_desc(); - LOG_DEBUG_MSG("Pre-Posting receive " << *header_region_ << "context " - << hexpointer(this)); - - hpx::util::yield_while( - [this, desc]() { - // post a receive using 'this' as the context, so that this - // receiver object can be used to handle the incoming - // receive/request - int ret = fi_recv(this->endpoint_, - this->header_region_->get_address(), - this->header_region_->get_size(), desc, 0, this); - - if (ret == -FI_EAGAIN) - { - LOG_ERROR_MSG("reposting fi_recv\n"); - return true; - } - else if (ret != 0) - { - throw fabric_error(ret, "pp_post_rx"); - } - - return false; - }, - "libfabric::receiver::post_recv"); - - FUNC_END_DEBUG_MSG; - } -} // namespace hpx::parcelset::policies::libfabric diff --git a/libs/full/parcelport_libfabric/src/rma_receiver.cpp b/libs/full/parcelport_libfabric/src/rma_receiver.cpp deleted file mode 100644 index 5782978777e1..000000000000 --- a/libs/full/parcelport_libfabric/src/rma_receiver.cpp +++ /dev/null @@ -1,576 +0,0 @@ -// Copyright (c) 2015-2017 John Biddiscombe -// Copyright (c) 2017 Thomas Heller -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#include -#include - -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include - -namespace hpx::parcelset::policies::libfabric { - // -------------------------------------------------------------------- - rma_receiver::rma_receiver(parcelport* pp, fid_ep* endpoint, - rma_memory_pool* memory_pool, - completion_handler&& handler) - : pp_(pp) - , endpoint_(endpoint) - , header_region_(nullptr) - , chunk_region_(nullptr) - , message_region_(nullptr) - , header_(nullptr) - , memory_pool_(memory_pool) - , handler_(HPX_MOVE(handler)) - , rma_count_(0) - , chunk_fetch_(false) - { - } - - // -------------------------------------------------------------------- - rma_receiver::~rma_receiver() - { - LOG_DEBUG_MSG("Receiving of message complete " << hexpointer(this)); - } - - // -------------------------------------------------------------------- - void rma_receiver::read_message( - region_type* region, fi_addr_t const& src_addr) - { - start_time_ = hpx::chrono::high_resolution_clock::now(); - HPX_ASSERT(rma_count_ == 0); - HPX_ASSERT(header_ == nullptr); - HPX_ASSERT(header_region_ == nullptr); - HPX_ASSERT(chunk_region_ == nullptr); - HPX_ASSERT(message_region_ == nullptr); - HPX_ASSERT(rma_regions_.size() == 0); - HPX_ASSERT(chunk_fetch_ == false); - - // where this message came from - src_addr_ = src_addr; - - // the region posted as a receive contains the received header - header_region_ = region; - header_ = reinterpret_cast(header_region_->get_address()); - - HPX_ASSERT(header_); - HPX_ASSERT(header_region_->get_address()); - - LOG_DEBUG_MSG( - "receiver " << hexpointer(this) << "Header : " << *header_); - - LOG_TRACE_MSG(CRC32_MEM( - header_, header_->header_length(), "Header region (recv)")); - - if (header_->chunk_ptr() == nullptr) - { - // the header does not have piggybacked chunks, we must rma-get them before - // we can decode the message, they may need further rma-get operations - handle_message_no_chunk_data(); - return; - } - - // how many RMA operations are needed - rma_count_ = header_->num_zero_copy_chunks(); - - LOG_DEBUG_MSG("receiver " << hexpointer(this) << "is expecting " - << decnumber(rma_count_) - << "read completions"); - - // If we have no zero copy chunks and piggy backed data, we can - // process the message immediately, otherwise, dispatch to receiver - // If we have neither piggy back, nor zero copy chunks, rma_count is 0 - if (rma_count_ == 0) - { - handle_message_no_rma(); - ++msg_plain_; - } - else - { - handle_message_with_zerocopy_rma(); - ++msg_rma_; - } - } - - // -------------------------------------------------------------------- - void rma_receiver::handle_message_no_rma() - { - HPX_ASSERT(header_); - LOG_DEBUG_MSG("receiver " - << hexpointer(this) - << "handle piggy backed send without zero copy regions"); - - char* piggy_back = header_->message_data(); - HPX_ASSERT(piggy_back); - - LOG_TRACE_MSG(CRC32_MEM(piggy_back, header_->message_size(), - "(Message region recv piggybacked - no rdma)")); - - typedef pinned_memory_vector - rcv_data_type; - typedef parcel_buffer> rcv_buffer_type; - - // when parcel decoding from the wrapped pointer buffer has completed, - // the lambda function will be called - rcv_data_type wrapped_pointer( - piggy_back, header_->message_size(), []() {}, nullptr, nullptr); - - rcv_buffer_type buffer(HPX_MOVE(wrapped_pointer), nullptr); - - int zc_chunks = - std::count_if(chunks_.begin(), chunks_.end(), [](chunk_struct& c) { - return c.type_ == serialization::chunk_type::chunk_type_pointer; - }); - int oo_chunks = chunks_.size() - zc_chunks; - - buffer.num_chunks_ = std::make_pair(zc_chunks, oo_chunks); - buffer.data_size_ = header_->message_size(); -#if defined(HPX_HAVE_PARCELPORT_COUNTERS) - parcelset::data_point& data = buffer.data_point_; - data.bytes_ = static_cast(header_->message_size()); - data.time_ = hpx::chrono::high_resolution_clock::now() - start_time_; -#endif - LOG_DEBUG_MSG("receiver " - << hexpointer(this) - << "calling parcel decode for complete NORMAL parcel"); - std::size_t num_thread = hpx::get_worker_thread_num(); - std::vector parcels = decode_message_with_chunks( - *pp_, HPX_MOVE(buffer), 0, chunks_, num_thread); - handle_received_parcels(HPX_MOVE(parcels), num_thread); - - LOG_DEBUG_MSG("receiver " - << hexpointer(this) - << "parcel decode called for complete NORMAL (small) parcel"); - - cleanup_receive(); - } - - // -------------------------------------------------------------------- - void rma_receiver::handle_message_with_zerocopy_rma() - { - chunks_.resize(header_->num_chunks()); - char* chunk_data = header_->chunk_data(); - HPX_ASSERT(chunk_data); - - size_t chunkbytes = chunks_.size() * sizeof(chunk_struct); - - std::memcpy(chunks_.data(), chunk_data, chunkbytes); - LOG_DEBUG_MSG("receiver " << hexpointer(this) - << "Copied chunk data from header : size " - << decnumber(chunkbytes)); - - LOG_EXCLUSIVE(for (const chunk_struct& c - : chunks_) { - LOG_DEBUG_MSG("receiver " - << hexpointer(this) << "recv : chunk : size " - << hexnumber(c.size_) << " type " - << decnumber((uint64_t) c.type_) << " rkey " - << hexpointer(c.rkey_) << " cpos " << hexpointer(c.data_.cpos_) - << " index " << decnumber(c.data_.index_)); - }); - - rma_regions_.reserve(header_->num_zero_copy_chunks()); - - // for each zerocopy chunk, schedule a read operation - read_chunk_list(); - } - - // -------------------------------------------------------------------- - void rma_receiver::handle_message_no_chunk_data() - { - chunk_fetch_ = true; - // get the remote chunk block memory region details - auto& cb = header_->chunk_header_ptr()->chunk_rma; - LOG_DEBUG_MSG("receiver " - << hexpointer(this) << "Fetching RMA chunk chunk data with " - << "size " << decnumber(cb.size_) << "rkey " << hexpointer(cb.rkey_) - << "addr " << hexpointer(cb.data_.cpos_)); - - // we need a local memory region to read the chunks into - chunk_region_ = memory_pool_->allocate_region(cb.size_); - chunk_region_->set_message_length(cb.size_); - uint64_t rkey1 = cb.rkey_; - const void* remoteAddr1 = cb.data_.cpos_; - // add it to the list of rma regions to fetch - rma_regions_.push_back(chunk_region_); - LOG_DEBUG_MSG("receiver " << hexpointer(this) - << "Fetching chunk region with size " - << decnumber(cb.size_)); - rma_count_ = 1; - - // if the message region requires rma-get, we can get it now as well - if (!header_->message_piggy_back()) - { - auto& mc = header_->message_chunk_ptr()->message_rma; - message_region_ = memory_pool_->allocate_region(mc.size_); - message_region_->set_message_length(mc.size_); - uint64_t rkey2 = mc.rkey_; - const void* remoteAddr2 = mc.data_.cpos_; - // add it to the list of rma regions to fetch - rma_regions_.push_back(message_region_); - LOG_DEBUG_MSG("receiver " << hexpointer(this) - << "Fetching message region with size " - << decnumber(mc.size_)); - ++rma_count_; - // call the rma read function for the message chunk - read_one_chunk(src_addr_, message_region_, remoteAddr2, rkey2); - } - - // call the rma read function for the chunk-info chunk - // we do this last so that a completion does not come before the message - // chunk read has been triggered - this would cause the rma receive to decrement - // the counter and might hit zero before we had sent the second request - read_one_chunk(src_addr_, chunk_region_, remoteAddr1, rkey1); - } - - // -------------------------------------------------------------------- - int rma_receiver::handle_chunks_read_message() - { - char* chunk_data = chunk_region_->get_address(); - HPX_ASSERT(chunk_data); - // - uint64_t chunkbytes = chunk_region_->get_message_length(); - uint64_t num_chunks = chunkbytes / sizeof(chunk_struct); - chunks_.resize(num_chunks); - std::memcpy(chunks_.data(), chunk_data, chunkbytes); - LOG_DEBUG_MSG("receiver " - << hexpointer(this) << "Copied chunk data from chunk_region: size " - << decnumber(chunkbytes) << "with num chunks " - << decnumber(num_chunks)); - // - rma_regions_.clear(); - chunk_fetch_ = false; - // for each zerocopy chunk, schedule a read operation - uint64_t zc_count = - std::count_if(chunks_.begin(), chunks_.end(), [](chunk_struct& c) { - return c.type_ == serialization::chunk_type::chunk_type_pointer; - }); - LOG_DEBUG_MSG("receiver " << hexpointer(this) - << "Restarting RMA reads with " - << decnumber(rma_count_) << "chunks"); - rma_count_ = zc_count; - // perform an rma read for each zero copy chunk - read_chunk_list(); - // do not return rma_count_ as it might already have decremented! (racey) - HPX_ASSERT(rma_regions_.size() == zc_count); - return rma_regions_.size(); - } - - // -------------------------------------------------------------------- - void rma_receiver::read_chunk_list() - { - for (chunk_struct& c : chunks_) - { - if (c.type_ == serialization::chunk_type::chunk_type_pointer) - { - region_type* get_region = - memory_pool_->allocate_region(c.size_); - LOG_TRACE_MSG(CRC32_MEM(get_region->get_address(), c.size_, - "(RDMA GET region (new))")); - - rma_regions_.push_back(get_region); - get_region->set_message_length(c.size_); - - // overwrite the serialization chunk data pointer because the chunk - // info sent contains the pointer to the remote data and when we - // decode the parcel we want the chunk to point to the local copy of it - const void* remoteAddr = c.data_.cpos_; - c.data_.cpos_ = get_region->get_address(); - // call the rma read function for the chunk - read_one_chunk(src_addr_, get_region, remoteAddr, c.rkey_); - } - } - } - - // -------------------------------------------------------------------- - void rma_receiver::read_one_chunk(fi_addr_t, region_type* get_region, - const void* remoteAddr, uint64_t rkey) - { - // post the rdma read/get - LOG_DEBUG_MSG("rma_receiver " - << hexpointer(this) << "RDMA Get fi_read :" - << "client " << hexpointer(endpoint_) << "fi_addr " - << hexpointer(src_addr_) << "tag " << hexuint64(header_->tag()) - << "local addr " << hexpointer(get_region->get_address()) - << "local desc " << hexpointer(get_region->get_desc()) << "size " - << hexlength(get_region->get_message_length()) << "rkey " - << hexpointer(rkey) << "remote cpos " << hexpointer(remoteAddr)); - - // count reads - ++rma_reads_; - - hpx::util::yield_while( - [this, get_region, remoteAddr, rkey]() { - LOG_EXCLUSIVE( - // write a pattern and dump out data for debugging purposes - uint32_t* buffer = - reinterpret_cast(get_region->get_address()); - std::fill(buffer, buffer + get_region->get_size() / 4, - 0xDEADC0DE); - LOG_TRACE_MSG(CRC32_MEM(get_region->get_address(), - get_region->get_size(), - "(RDMA GET region (pre-fi_read))"));); - - ssize_t ret = fi_read(endpoint_, get_region->get_address(), - get_region->get_message_length(), get_region->get_desc(), - src_addr_, (uint64_t)(remoteAddr), rkey, this); - - if (ret == -FI_EAGAIN) - { - LOG_ERROR_MSG("receiver " << hexpointer(this) - << "reposting fi_read...\n"); - return true; - } - else if (ret) - { - throw fabric_error(ret, "fi_read"); - } - - return false; - }, - "libfabric::receiver::async_read"); - } - - // -------------------------------------------------------------------- - // whenever and rma read completion event occurs, this function is entered. - // an atomic counter, counts down with each completion, until we hit zero, - // when all expected read results are available. - void rma_receiver::handle_rma_read_completion() - { - FUNC_START_DEBUG_MSG; - HPX_ASSERT(rma_count_ > 0); - // If we haven't read all chunks, we can return and wait - // for the other incoming read completions - if (--rma_count_ > 0) - { - LOG_DEBUG_MSG("receiver " << hexpointer(this) - << "Not yet read all RMA regions " - << hexpointer(this)); - FUNC_START_DEBUG_MSG; - return; - } - - HPX_ASSERT(rma_count_ == 0); - - // when the chunk structure could not be piggybacked, the chunk_fetch_ - // flag is set prior to reading the chunks. - if (chunk_fetch_) - { - LOG_DEBUG_MSG("receiver " << hexpointer(this) - << "rma read chunk list complete"); - if (handle_chunks_read_message() > 0) - { - // more rma reads have been started, so exit and wait for them - return; - } - } - else - { - LOG_DEBUG_MSG( - "receiver " << hexpointer(this) << "all RMA regions now read "); - } - - // If the main message was not piggy backed, then the message region - // is either the final chunk of the rma list (if chunks were piggybacked) - // or read via rma during the chunk fetch (chunks not piggybacked) - if (!header_->message_piggy_back()) - { - if (header_->chunk_ptr()) - { - message_region_ = rma_regions_.back(); - // - rma_regions_.resize(rma_regions_.size() - 1); - chunks_.resize(chunks_.size() - 1); - } - else - { - LOG_DEBUG_MSG("No piggy back message or chunks"); - // message region should have been read by handle_message_no_chunk_data - HPX_ASSERT(message_region_); - } - } - - std::size_t message_length = header_->message_size(); - char* message = nullptr; - if (message_region_) - { - message = static_cast(message_region_->get_address()); - HPX_ASSERT(message); - LOG_DEBUG_MSG("receiver " - << hexpointer(this) << "No piggy_back RDMA message " - << "region " << hexpointer(message_region_) << "address " - << hexpointer(message_region_->get_address()) << "length " - << hexuint32(message_length)); - LOG_TRACE_MSG(CRC32_MEM( - message, message_length, "Message region (recv rdma)")); - - // do this after dumping out data as otherwise we lose some debug info - HPX_ASSERT(message_region_->get_message_length() == - header_->message_size()); - } - else - { - HPX_ASSERT(header_->message_data()); - message = header_->message_data(); - LOG_TRACE_MSG(CRC32_MEM(message, message_length, - "Message region (recv piggyback with rdma)")); - } - - for (auto& r : rma_regions_) - { - (void) r; - LOG_TRACE_MSG(CRC32_MEM(r->get_address(), r->get_message_length(), - "rdma region (recv) ")); - } - - // wrap the message and chunks into a pinned vector so that they - // can be passed into the parcel decode functions and when released have - // the pinned buffers returned to the memory pool - typedef pinned_memory_vector - rcv_data_type; - typedef parcel_buffer> rcv_buffer_type; - - rcv_data_type wrapped_pointer( - message, message_length, - [this, message, message_length]() { - // deleted cleanup until all problems resolved, now after parcel decode - if (message_region_) - { - LOG_TRACE_MSG(CRC32_MEM(message, message_length, - "Message region (receiver delete)")); - } - }, - nullptr, nullptr); - - rcv_buffer_type buffer(HPX_MOVE(wrapped_pointer), nullptr); - - LOG_EXCLUSIVE(for (chunk_struct& c - : chunks_) { - LOG_DEBUG_MSG("get : chunk : size " - << hexnumber(c.size_) << " type " - << decnumber((uint64_t) c.type_) << " rkey " - << hexpointer(c.rkey_) << " cpos " << hexpointer(c.data_.cpos_) - << " index " << decnumber(c.data_.index_)); - }); - - int zc_chunks = - std::count_if(chunks_.begin(), chunks_.end(), [](chunk_struct& c) { - return c.type_ == serialization::chunk_type::chunk_type_pointer; - }); - int oo_chunks = chunks_.size() - zc_chunks; - - buffer.num_chunks_ = std::make_pair(zc_chunks, oo_chunks); - buffer.data_size_ = header_->message_size(); -#if defined(HPX_HAVE_PARCELPORT_COUNTERS) - parcelset::data_point& data = buffer.data_point_; - data.bytes_ = static_cast(header_->message_size()); - data.time_ = hpx::chrono::high_resolution_clock::now() - start_time_; -#endif - LOG_DEBUG_MSG("receiver " - << hexpointer(this) - << "calling parcel decode for ZEROCOPY complete parcel"); - std::size_t num_thread = hpx::get_worker_thread_num(); - std::vector parcels = decode_message_with_chunks( - *pp_, HPX_MOVE(buffer), 0, chunks_, num_thread); - handle_received_parcels(HPX_MOVE(parcels), num_thread); - - LOG_DEBUG_MSG("receiver " - << hexpointer(this) - << "parcel decode called for ZEROCOPY complete parcel"); - - LOG_DEBUG_MSG("receiver " << hexpointer(this) << "Sending ack"); - send_rdma_complete_ack(); - - cleanup_receive(); - FUNC_END_DEBUG_MSG; - } - - // -------------------------------------------------------------------- - void rma_receiver::send_rdma_complete_ack() - { - LOG_DEBUG_MSG("receiver " - << hexpointer(this) << "RDMA Get tag " << hexuint64(header_->tag()) - << " has completed : posting 8 byte ack to origin"); - - ++sent_ack_; - - hpx::util::yield_while( - [this]() { - // when we received the incoming message, the tag was already set - // with the sender context so that we can signal it directly - // that we have completed RMA and the sender my now cleanup. - std::uint64_t tag = this->header_->tag(); - int ret = fi_inject(this->endpoint_, &tag, - sizeof(std::uint64_t), this->src_addr_); - - if (ret == -FI_EAGAIN) - { - LOG_ERROR_MSG("receiver " << hexpointer(this) - << "reposting fi_inject...\n"); - return true; - } - else if (ret) - { - throw fabric_error(ret, "fi_inject ack notification error"); - } - - return false; - }, - "libfabric::receiver::send_rdma_complete_ack"); - } - - // -------------------------------------------------------------------- - void rma_receiver::cleanup_receive() - { - LOG_DEBUG_MSG("cleanup for receiver rma " << hexpointer(this)); - // - HPX_ASSERT(rma_count_ == 0); - // - ++recv_deletes_; - // - memory_pool_->deallocate(header_region_); - header_region_ = nullptr; - chunk_region_ = nullptr; - header_ = nullptr; - src_addr_ = 0; - // - if (message_region_) - { - memory_pool_->deallocate(message_region_); - message_region_ = nullptr; - } - // - for (auto region : rma_regions_) - { - memory_pool_->deallocate(region); - } - rma_regions_.clear(); - chunks_.clear(); - // - LOG_DEBUG_MSG("receiver " - << hexpointer(this) - << "Cleaned up, posting self back to rma stack"); - handler_(this); - } - - // -------------------------------------------------------------------- - void rma_receiver::handle_error(struct fi_cq_err_entry) - { - LOG_ERROR_MSG("rma_receiver handling an error (unimplemented)" - << hexpointer(this)); - } -} // namespace hpx::parcelset::policies::libfabric diff --git a/libs/full/parcelport_libfabric/src/sender.cpp b/libs/full/parcelport_libfabric/src/sender.cpp deleted file mode 100644 index 49e940b3e74b..000000000000 --- a/libs/full/parcelport_libfabric/src/sender.cpp +++ /dev/null @@ -1,335 +0,0 @@ -// Copyright (c) 2015-2017 John Biddiscombe -// Copyright (c) 2017 Thomas Heller -// -// SPDX-License-Identifier: BSL-1.0 -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -namespace hpx::parcelset::policies::libfabric { - // -------------------------------------------------------------------- - // The main message send routine - void sender::async_write_impl() - { -#if defined(HPX_HAVE_PARCELPORT_COUNTERS) - buffer_.data_point_.time_ = hpx::chrono::high_resolution_clock::now(); -#endif - HPX_ASSERT(message_region_ == nullptr); - HPX_ASSERT(completion_count_ == 0); - // increment counter of total messages sent - ++sends_posted_; - - // for each zerocopy chunk, we must create a memory region for the data - // do this before creating the header as the chunk details will be copied - // into the header space - int index = 0; - for (auto& c : buffer_.chunks_) - { - // Debug only, dump out the chunk info - LOG_DEBUG_MSG("write : chunk : size " - << hexnumber(c.size_) << " type " - << decnumber((uint64_t) c.type_) << " rkey " - << hexpointer(c.rkey_) << " cpos " << hexpointer(c.data_.cpos_) - << " index " << decnumber(c.data_.index_)); - if (c.type_ == serialization::chunk_type::chunk_type_pointer) - { - LOG_EXCLUSIVE(chrono::high_resolution_timer regtimer); - - // create a new memory region from the user supplied pointer - region_type* zero_copy_region = - new region_type(domain_, c.data_.cpos_, c.size_); - - rma_regions_.push_back(zero_copy_region); - - // set the region remote access key in the chunk space - c.rkey_ = zero_copy_region->get_remote_key(); - LOG_DEBUG_MSG("Time to register memory (ns) " - << decnumber(regtimer.elapsed_nanoseconds())); - LOG_DEBUG_MSG("Created zero-copy rdma Get region " - << decnumber(index) << *zero_copy_region << "for rkey " - << hexpointer(c.rkey_)); - - LOG_TRACE_MSG(CRC32_MEM(zero_copy_region->get_address(), - zero_copy_region->get_message_length(), - "zero_copy_region (pre-send) ")); - } - ++index; - } - - // create the header using placement new in the pinned memory block - char* header_memory = (char*) (header_region_->get_address()); - - LOG_DEBUG_MSG("Placement new for header"); - header_ = new (header_memory) header_type(buffer_, this); - header_region_->set_message_length(header_->header_length()); - - LOG_DEBUG_MSG("sender " - << hexpointer(this) << ", buffsize " - << hexuint32(header_->message_size()) << ", header_length " - << decnumber(header_->header_length()) << ", chunks zerocopy( " - << decnumber(buffer_.num_chunks_.first) << ") " - << ", normal( " << decnumber(buffer_.num_chunks_.second) << ") " - << ", chunk_flag " << decnumber(header_->header_length()) - << ", tag " << hexuint64(header_->tag())); - - // reserve some space for zero copy information - rma_regions_.reserve(buffer_.num_chunks_.first); - - // Get the block of pinned memory where the message was encoded - // during serialization - message_region_ = buffer_.data_.m_region_; - message_region_->set_message_length(header_->message_size()); - - HPX_ASSERT(header_->message_size() == buffer_.data_.size()); - LOG_DEBUG_MSG("Found region allocated during encode_parcel : address " - << hexpointer(buffer_.data_.m_array_) << " region " - << *message_region_); - - // The number of completions we need before cleaning up: - // 1 (header block send) + 1 (ack message if we have RMA chunks) - completion_count_ = 1; - region_list_[0] = {header_region_->get_address(), - header_region_->get_message_length()}; - region_list_[1] = {message_region_->get_address(), - message_region_->get_message_length()}; - - desc_[0] = header_region_->get_desc(); - desc_[1] = message_region_->get_desc(); - if (rma_regions_.size() > 0 || !header_->message_piggy_back()) - { - completion_count_ = 2; - } - - if (header_->chunk_data()) - { - LOG_DEBUG_MSG( - "Sender " << hexpointer(this) << "Chunk info is piggybacked"); - } - else - { - LOG_DEBUG_MSG("Setting up header-chunk rma data with " - << "zero-copy chunks " << decnumber(rma_regions_.size())); - auto& cb = header_->chunk_header_ptr()->chunk_rma; - chunk_region_ = memory_pool_->allocate_region(cb.size_); - cb.data_.pos_ = chunk_region_->get_address(); - cb.rkey_ = chunk_region_->get_remote_key(); - std::memcpy(cb.data_.pos_, buffer_.chunks_.data(), cb.size_); - LOG_DEBUG_MSG("Set up header-chunk rma data with " - << "size " << decnumber(cb.size_) << "rkey " - << hexpointer(cb.rkey_) << "addr " - << hexpointer(cb.data_.cpos_)); - } - - if (header_->message_piggy_back()) - { - LOG_DEBUG_MSG( - "Sender " << hexpointer(this) << "Main message is piggybacked"); - - LOG_TRACE_MSG(CRC32_MEM(header_region_->get_address(), - header_region_->get_message_length(), - "Header region (send piggyback)")); - - LOG_TRACE_MSG(CRC32_MEM(message_region_->get_address(), - message_region_->get_message_length(), - "Message region (send piggyback)")); - - // send 2 regions as one message, goes into one receive - hpx::util::yield_while( - [this]() { - int ret = fi_sendv(this->endpoint_, this->region_list_, - this->desc_, 2, this->dst_addr_, this); - - if (ret == -FI_EAGAIN) - { - LOG_ERROR_MSG("reposting fi_sendv...\n"); - return true; - } - else if (ret) - { - throw fabric_error(ret, "fi_sendv"); - } - - return false; - }, - "sender::async_write"); - } - else - { - header_->set_message_rdma_info(message_region_->get_remote_key(), - message_region_->get_address()); - - LOG_DEBUG_MSG("Sender " - << hexpointer(this) << "message region NOT piggybacked " - << hexnumber(buffer_.data_.size()) << *message_region_); - - LOG_TRACE_MSG(CRC32_MEM(header_region_->get_address(), - header_region_->get_message_length(), - "Header region (pre-send)")); - - LOG_TRACE_MSG(CRC32_MEM(message_region_->get_address(), - message_region_->get_message_length(), - "Message region (send for rdma fetch)")); - - // send just the header region - a single message - hpx::util::yield_while( - [this]() { - int ret = - fi_send(this->endpoint_, this->region_list_[0].iov_base, - this->region_list_[0].iov_len, this->desc_[0], - this->dst_addr_, this); - - if (ret == -FI_EAGAIN) - { - LOG_ERROR_MSG("reposting fi_send...\n"); - return true; - } - else if (ret) - { - throw fabric_error(ret, "fi_sendv"); - } - - return false; - }, - "sender::async_write"); - } - - FUNC_END_DEBUG_MSG; - } - - // -------------------------------------------------------------------- - void sender::handle_send_completion() - { - LOG_DEBUG_MSG("Sender " - << hexpointer(this) << "handle send_completion " - << "RMA regions " << decnumber(rma_regions_.size()) - << "completion count " << decnumber(completion_count_)); - cleanup(); - } - - // -------------------------------------------------------------------- - void sender::handle_message_completion_ack() - { - LOG_DEBUG_MSG("Sender " - << hexpointer(this) << "handle handle_message_completion_ack ( " - << "RMA regions " << decnumber(rma_regions_.size()) - << "completion count " << decnumber(completion_count_)); - ++acks_received_; - cleanup(); - } - - // -------------------------------------------------------------------- - void sender::cleanup() - { - LOG_DEBUG_MSG("Sender " << hexpointer(this) - << "decrementing completion_count from " - << decnumber(completion_count_)); - - // if we need to wait for more completion events, return without cleaning - if (--completion_count_ > 0) - return; - - // track deletions - ++sends_deleted_; - - error_code ec; - handler_(ec); - handler_.reset(); - - // cleanup header and message region - memory_pool_->deallocate(message_region_); - message_region_ = nullptr; - header_ = nullptr; - // cleanup chunk region - if (chunk_region_) - { - memory_pool_->deallocate(chunk_region_); - chunk_region_ = nullptr; - } - - for (auto& region : rma_regions_) - { - memory_pool_->deallocate(region); - } - rma_regions_.clear(); -#if defined(HPX_HAVE_PARCELPORT_COUNTERS) - buffer_.data_point_.time_ = hpx::chrono::high_resolution_clock::now() - - buffer_.data_point_.time_; - parcelport_->add_sent_data(buffer_.data_point_); -#endif - postprocess_handler_(this); - } - - // -------------------------------------------------------------------- - void sender::handle_error(struct fi_cq_err_entry) - { - LOG_ERROR_MSG("resending message after error " << hexpointer(this)); - - if (header_->message_piggy_back()) - { - // send 2 regions as one message, goes into one receive - hpx::util::yield_while( - [this]() { - int ret = fi_sendv(this->endpoint_, this->region_list_, - this->desc_, 2, this->dst_addr_, this); - - if (ret == -FI_EAGAIN) - { - LOG_ERROR_MSG("reposting fi_sendv...\n"); - return true; - } - else if (ret) - { - throw fabric_error(ret, "fi_sendv"); - } - - return false; - }, - "libfabric::sender::handle_error"); - } - else - { - header_->set_message_rdma_info(message_region_->get_remote_key(), - message_region_->get_address()); - - // send just the header region - a single message - hpx::util::yield_while( - [this]() { - int ret = - fi_send(this->endpoint_, this->region_list_[0].iov_base, - this->region_list_[0].iov_len, this->desc_[0], - this->dst_addr_, this); - - if (ret == -FI_EAGAIN) - { - LOG_ERROR_MSG("reposting fi_send...\n"); - return true; - } - else if (ret) - { - throw fabric_error(ret, "fi_sendv"); - } - - return false; - }, - "libfabric::sender::handle_error"); - } - } -} // namespace hpx::parcelset::policies::libfabric diff --git a/libs/full/parcelport_libfabric/tests/CMakeLists.txt b/libs/full/parcelport_libfabric/tests/CMakeLists.txt deleted file mode 100644 index 208222570eef..000000000000 --- a/libs/full/parcelport_libfabric/tests/CMakeLists.txt +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) 2020-2021 The STE||AR-Group -# -# SPDX-License-Identifier: BSL-1.0 -# Distributed under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -include(HPX_Message) - -if(HPX_WITH_TESTS) - if(HPX_WITH_TESTS_UNIT) - add_hpx_pseudo_target(tests.unit.modules.parcelport_libfabric) - add_hpx_pseudo_dependencies( - tests.unit.modules tests.unit.modules.parcelport_libfabric - ) - add_subdirectory(unit) - endif() - - if(HPX_WITH_TESTS_REGRESSIONS) - add_hpx_pseudo_target(tests.regressions.modules.parcelport_libfabric) - add_hpx_pseudo_dependencies( - tests.regressions.modules tests.regressions.modules.parcelport_libfabric - ) - add_subdirectory(regressions) - endif() - - if(HPX_WITH_TESTS_BENCHMARKS) - add_hpx_pseudo_target(tests.performance.modules.parcelport_libfabric) - add_hpx_pseudo_dependencies( - tests.performance.modules tests.performance.modules.parcelport_libfabric - ) - add_subdirectory(performance) - endif() - - if(HPX_WITH_TESTS_HEADERS) - add_hpx_header_tests( - modules.parcelport_libfabric - HEADERS ${parcelport_libfabric_headers} - HEADER_ROOT ${PROJECT_SOURCE_DIR}/include - DEPENDENCIES hpx_parcelport_libfabric - ) - endif() -endif() diff --git a/libs/full/parcelport_libfabric/tests/performance/CMakeLists.txt b/libs/full/parcelport_libfabric/tests/performance/CMakeLists.txt deleted file mode 100644 index 2f7420810a42..000000000000 --- a/libs/full/parcelport_libfabric/tests/performance/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) 2020-2021 The STE||AR-Group -# -# SPDX-License-Identifier: BSL-1.0 -# Distributed under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) diff --git a/libs/full/parcelport_libfabric/tests/regressions/CMakeLists.txt b/libs/full/parcelport_libfabric/tests/regressions/CMakeLists.txt deleted file mode 100644 index 2f7420810a42..000000000000 --- a/libs/full/parcelport_libfabric/tests/regressions/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) 2020-2021 The STE||AR-Group -# -# SPDX-License-Identifier: BSL-1.0 -# Distributed under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) diff --git a/libs/full/parcelport_libfabric/tests/unit/CMakeLists.txt b/libs/full/parcelport_libfabric/tests/unit/CMakeLists.txt deleted file mode 100644 index 2f7420810a42..000000000000 --- a/libs/full/parcelport_libfabric/tests/unit/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) 2020-2021 The STE||AR-Group -# -# SPDX-License-Identifier: BSL-1.0 -# Distributed under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) diff --git a/libs/full/runtime_distributed/tests/unit/thread_mapper_parcel_pools.cpp b/libs/full/runtime_distributed/tests/unit/thread_mapper_parcel_pools.cpp index c207c0b73a37..435c875c7d84 100644 --- a/libs/full/runtime_distributed/tests/unit/thread_mapper_parcel_pools.cpp +++ b/libs/full/runtime_distributed/tests/unit/thread_mapper_parcel_pools.cpp @@ -38,7 +38,7 @@ void enumerate_threads(std::size_t num_custom_threads) #ifdef HPX_HAVE_NETWORKING std::size_t num_parcel_threads = 0; std::vector const parcelport_names = { - "tcp", "mpi", "lci", "libfabric"}; + "tcp", "mpi", "lci", "gasnet"}; for (auto parcelport_name : parcelport_names) { if (hpx::get_config_entry( diff --git a/tests/performance/network/network_storage/slurm-network-storage.sh.in b/tests/performance/network/network_storage/slurm-network-storage.sh.in index 7a0ea6d9008b..7e80723101d6 100755 --- a/tests/performance/network/network_storage/slurm-network-storage.sh.in +++ b/tests/performance/network/network_storage/slurm-network-storage.sh.in @@ -100,13 +100,11 @@ do QUEUE=normal fi -# for PARCELTYPE in "mpi" "libfabric" - for PARCELTYPE in "libfabric" + for PARCELTYPE in "mpi" "tcp" do TCP_ENABLE="-Ihpx.parcel.tcp.enable=0" MPI_ENABLE="-Ihpx.parcel.mpi.enable=0" - FAB_ENABLE="-Ihpx.parcel.libfabric.enable=0" BOOTSTRAP="-Ihpx.parcel.bootstrap=$PARCELTYPE" @@ -114,8 +112,6 @@ do TCP_ENABLE="-Ihpx.parcel.tcp.enable=1" elif [ "$PARCELTYPE" == "mpi" ]; then MPI_ENABLE="-Ihpx.parcel.mpi.enable=1" - elif [ "$PARCELTYPE" == "libfabric" ]; then - FAB_ENABLE="-Ihpx.parcel.libfabric.enable=1" fi HPX_ARGS="-Ihpx.parcel.message_handlers=0 --hpx:bind=balanced " @@ -126,7 +122,7 @@ do do LOCAL_SIZE=$(printf "%.0f" $( bc <<< "scale=6;(128 * $TRANSFERSIZE * $THREADS_PERTASK)/1024" )) LOCAL_SIZE=$(echo $((LOCAL_SIZE>1024?1024:LOCAL_SIZE))) - PROGRAM_PARAMS="${BOOTSTRAP} ${TCP_ENABLE} ${MPI_ENABLE} ${FAB_ENABLE} --hpx:threads=${THREADS_PERTASK} ${HPX_ARGS} --localMB=${LOCAL_SIZE} --transferKB=${TRANSFERSIZE} --parceltype=${PARCELTYPE} --distribution=0 --all-to-all=1 --no-local=1 --iterations=100" + PROGRAM_PARAMS="${BOOTSTRAP} ${TCP_ENABLE} ${MPI_ENABLE} --hpx:threads=${THREADS_PERTASK} ${HPX_ARGS} --localMB=${LOCAL_SIZE} --transferKB=${TRANSFERSIZE} --parceltype=${PARCELTYPE} --distribution=0 --all-to-all=1 --no-local=1 --iterations=100" write_script done done