From 0616894d6051acbdb7c2bb3319f61f2023940855 Mon Sep 17 00:00:00 2001 From: Jiakun Yan Date: Sat, 9 Nov 2024 17:41:26 -0600 Subject: [PATCH] improve the setup of LCI_MAX_SINGLE_MESSAGE_SIZE for infiniband; also change its default to -1. --- CMakeLists.txt | 2 +- lci/api/lci_config.h.in | 2 +- lci/backend/ibv/server_ibv.c | 19 +++++++++---------- lci/backend/ofi/server_ofi.c | 2 +- lci/runtime/env.c | 2 +- lci/sys/lciu_misc.h | 12 ++++++++++++ 6 files changed, 25 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 09b5dbd6..13b93234 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -207,7 +207,7 @@ if(NOT LCI_WITH_LCT_ONLY) set_property(CACHE LCI_RDV_PROTOCOL_DEFAULT PROPERTY STRINGS write writeimm) set(LCI_MAX_SINGLE_MESSAGE_SIZE_DEFAULT - 2000000000 + 0x7FFFFFFF CACHE STRING "Default single low-level message max size") mark_as_advanced( diff --git a/lci/api/lci_config.h.in b/lci/api/lci_config.h.in index ad3c6c23..e18e7e60 100644 --- a/lci/api/lci_config.h.in +++ b/lci/api/lci_config.h.in @@ -40,7 +40,7 @@ #cmakedefine01 LCI_IBV_ENABLE_TD_DEFAULT #cmakedefine01 LCI_ENABLE_PRG_NET_ENDPOINT_DEFAULT #define LCI_RDV_PROTOCOL_DEFAULT "@LCI_RDV_PROTOCOL_DEFAULT@" -#define LCI_MAX_SINGLE_MESSAGE_SIZE_DEFAULT @LCI_MAX_SINGLE_MESSAGE_SIZE_DEFAULT@ +#define LCI_MAX_SINGLE_MESSAGE_SIZE_DEFAULT (@LCI_MAX_SINGLE_MESSAGE_SIZE_DEFAULT@) #define LCI_CQ_MAX_POLL 16 #define LCI_SERVER_MAX_ENDPOINTS 8 diff --git a/lci/backend/ibv/server_ibv.c b/lci/backend/ibv/server_ibv.c index edf33680..969a75fe 100644 --- a/lci/backend/ibv/server_ibv.c +++ b/lci/backend/ibv/server_ibv.c @@ -76,16 +76,6 @@ void LCISI_event_polling_thread_fina(LCISI_server_t* server) void LCISD_server_init(LCIS_server_t* s) { - // Check configurations - if (LCI_MAX_SINGLE_MESSAGE_SIZE >= 2 << 31) { - // ibverbs' max message is 2GiB (or 2GB?) - LCI_MAX_SINGLE_MESSAGE_SIZE = 2 << 31 - 1; - LCI_Warn( - "Reduce LCI_MAX_SINGLE_MESSAGE_SIZE to %lu" - "as required by libibverbs max message size\n", - LCI_MAX_SINGLE_MESSAGE_SIZE); - } - LCISI_server_t* server = LCIU_malloc(sizeof(LCISI_server_t)); *s = (LCIS_server_t)server; @@ -205,6 +195,15 @@ void LCISD_server_init(LCIS_server_t* s) mtu_str(server->port_attr.max_mtu), mtu_str(server->port_attr.active_mtu)); + // Check max_msg_sz + if (LCI_MAX_SINGLE_MESSAGE_SIZE > server->port_attr.max_msg_sz) { + LCI_MAX_SINGLE_MESSAGE_SIZE = server->port_attr.max_msg_sz; + LCI_Log(LCI_LOG_INFO, "ibv", + "Reduce LCI_MAX_SINGLE_MESSAGE_SIZE to %lu " + "as required by libibverbs max message size\n", + LCI_MAX_SINGLE_MESSAGE_SIZE); + } + // query the gid server->gid_idx = LCI_IBV_GID_IDX; if (server->gid_idx < 0 && diff --git a/lci/backend/ofi/server_ofi.c b/lci/backend/ofi/server_ofi.c index eb7dfa89..ed35a69b 100644 --- a/lci/backend/ofi/server_ofi.c +++ b/lci/backend/ofi/server_ofi.c @@ -102,7 +102,7 @@ void LCISD_server_init(LCIS_server_t* s) if (server->info->ep_attr->max_msg_size < LCI_MAX_SINGLE_MESSAGE_SIZE) { LCI_MAX_SINGLE_MESSAGE_SIZE = server->info->ep_attr->max_msg_size; LCI_Warn( - "Reduce LCI_MAX_SINGLE_MESSAGE_SIZE to %lu" + "Reduce LCI_MAX_SINGLE_MESSAGE_SIZE to %lu " "as required by the libfabric max_msg_size attribute\n", LCI_MAX_SINGLE_MESSAGE_SIZE); } diff --git a/lci/runtime/env.c b/lci/runtime/env.c index 992fa037..fbe59aa4 100644 --- a/lci/runtime/env.c +++ b/lci/runtime/env.c @@ -101,7 +101,7 @@ void LCII_env_init(int num_proc, int rank) (LCI_PACKET_SIZE - sizeof(struct LCII_packet_context) - sizeof(struct LCII_packet_rtr_t)) / sizeof(struct LCII_packet_rtr_rbuffer_info_t)); - LCI_MAX_SINGLE_MESSAGE_SIZE = LCIU_getenv_or( + LCI_MAX_SINGLE_MESSAGE_SIZE = LCIU_getenv_or_ul( "LCI_MAX_SINGLE_MESSAGE_SIZE", LCI_MAX_SINGLE_MESSAGE_SIZE_DEFAULT); LCI_OFI_CXI_TRY_NO_HACK = LCIU_getenv_or("LCI_OFI_CXI_TRY_NO_HACK", false); { diff --git a/lci/sys/lciu_misc.h b/lci/sys/lciu_misc.h index 1a6ad726..0f251997 100644 --- a/lci/sys/lciu_misc.h +++ b/lci/sys/lciu_misc.h @@ -104,6 +104,18 @@ static inline int LCIU_getenv_or(char* env, int def) LCI_Log(LCI_LOG_INFO, "env", "set %s to be %d\n", env, ret); return ret; } +static inline size_t LCIU_getenv_or_ul(char* env, size_t def) +{ + size_t ret; + char* val = getenv(env); + if (val != NULL) { + LCI_Assert(sscanf(val, "%zu", &ret) == 1, "Unknown value: %lu\n", val); + } else { + ret = def; + } + LCI_Log(LCI_LOG_INFO, "env", "set %s to be %lu\n", env, ret); + return ret; +} static inline void LCIU_spin_for_nsec(double t) {