From 687cf6e66837b7496b55b9f12262a6f9c2b227db Mon Sep 17 00:00:00 2001 From: Jiakun Yan Date: Fri, 21 Jun 2024 12:58:53 -0700 Subject: [PATCH] force ofi cxi to disable LCI_ENABLE_PRG_NET_ENDPOINT --- lci/api/lci.h | 7 ------- lci/backend/ofi/server_ofi.c | 24 ++++------------------- lci/backend/ofi/server_ofi.h | 38 +----------------------------------- 3 files changed, 5 insertions(+), 64 deletions(-) diff --git a/lci/api/lci.h b/lci/api/lci.h index 1744e984..5f9ab289 100644 --- a/lci/api/lci.h +++ b/lci/api/lci.h @@ -592,13 +592,6 @@ typedef enum { } LCI_rdv_protocol_t; extern LCI_rdv_protocol_t LCI_RDV_PROTOCOL; -/** - * @ingroup LCI_COMM - * @brief For the libfabric cxi provider, Try turning off the hacking to see - * whether cxi has fixed the double mr_bind error. - */ -extern bool LCI_OFI_CXI_TRY_NO_HACK; - /** * @ingroup LCI_COMM * @brief For the UCX backend, use try_lock to wrap the ucx function calls. diff --git a/lci/backend/ofi/server_ofi.c b/lci/backend/ofi/server_ofi.c index dd69a4d7..258505c2 100644 --- a/lci/backend/ofi/server_ofi.c +++ b/lci/backend/ofi/server_ofi.c @@ -103,6 +103,10 @@ void LCISD_server_init(LCIS_server_t* s) LCI_Assert(LCI_USE_DREG == 0, "The registration cache should be turned off " "for libfabric cxi backend. Use `export LCI_USE_DREG=0`.\n"); + LCI_Assert(LCI_ENABLE_PRG_NET_ENDPOINT == 0, + "The progress-specific network endpoint " + "for libfabric cxi backend. Use `export " + "LCI_ENABLE_PRG_NET_ENDPOINT=0`.\n"); if (LCI_RDV_PROTOCOL != LCI_RDV_WRITE) { LCI_RDV_PROTOCOL = LCI_RDV_WRITE; LCI_Warn( @@ -116,15 +120,11 @@ void LCISD_server_init(LCIS_server_t* s) // Create domain. FI_SAFECALL(fi_domain(server->fabric, server->info, &server->domain, NULL)); - - server->endpoint_count = 0; } void LCISD_server_fina(LCIS_server_t s) { LCISI_server_t* server = (LCISI_server_t*)s; - LCI_Assert(server->endpoint_count == 0, "Endpoint count is not zero (%d)\n", - server->endpoint_count); FI_SAFECALL(fi_close((struct fid*)&server->domain->fid)); FI_SAFECALL(fi_close((struct fid*)&server->fabric->fid)); fi_freeinfo(server->info); @@ -138,19 +138,7 @@ void LCISD_endpoint_init(LCIS_server_t server_pp, LCIS_endpoint_t* endpoint_pp, LCISI_endpoint_t* endpoint_p = LCIU_malloc(sizeof(LCISI_endpoint_t)); *endpoint_pp = (LCIS_endpoint_t)endpoint_p; endpoint_p->server = (LCISI_server_t*)server_pp; - endpoint_p->server->endpoints[endpoint_p->server->endpoint_count++] = - endpoint_p; endpoint_p->is_single_threaded = single_threaded; - if (!LCI_OFI_CXI_TRY_NO_HACK && - strcmp(endpoint_p->server->info->fabric_attr->prov_name, "cxi") == 0 && - endpoint_p->server->info->domain_attr->mr_mode & FI_MR_ENDPOINT && - endpoint_p->server->endpoint_count > 1) { - // We are using more than one endpoint per server, but the cxi provider - // can only bind mr to one endpoint. We have to guess here. - endpoint_p->server->cxi_mr_bind_hack = true; - } else { - endpoint_p->server->cxi_mr_bind_hack = false; - } // Create end-point; endpoint_p->server->info->tx_attr->size = LCI_SERVER_MAX_SENDS; endpoint_p->server->info->rx_attr->size = LCI_SERVER_MAX_RECVS; @@ -223,10 +211,6 @@ void LCISD_endpoint_fina(LCIS_endpoint_t endpoint_pp) LCT_pmi_barrier(); LCISI_endpoint_t* endpoint_p = (LCISI_endpoint_t*)endpoint_pp; LCIU_free(endpoint_p->peer_addrs); - int my_idx = --endpoint_p->server->endpoint_count; - LCI_Assert(endpoint_p->server->endpoints[my_idx] == endpoint_p, - "This is not me!\n"); - endpoint_p->server->endpoints[my_idx] = NULL; FI_SAFECALL(fi_close((struct fid*)&endpoint_p->ep->fid)); FI_SAFECALL(fi_close((struct fid*)&endpoint_p->cq->fid)); FI_SAFECALL(fi_close((struct fid*)&endpoint_p->av->fid)); diff --git a/lci/backend/ofi/server_ofi.h b/lci/backend/ofi/server_ofi.h index 78fdabcd..1ae28f73 100644 --- a/lci/backend/ofi/server_ofi.h +++ b/lci/backend/ofi/server_ofi.h @@ -39,9 +39,6 @@ typedef struct __attribute__((aligned(LCI_CACHE_LINE))) LCISI_server_t { struct fi_info* info; struct fid_fabric* fabric; struct fid_domain* domain; - struct LCISI_endpoint_t* endpoints[LCI_SERVER_MAX_ENDPOINTS]; - int endpoint_count; - bool cxi_mr_bind_hack; } LCISI_server_t; typedef struct __attribute__((aligned(LCI_CACHE_LINE))) LCISI_endpoint_t { @@ -72,16 +69,7 @@ static inline void* LCISI_real_server_reg(LCIS_endpoint_t endpoint_pp, FI_READ | FI_WRITE | FI_REMOTE_WRITE, 0, rdma_key, 0, &mr, 0)); if (server->info->domain_attr->mr_mode & FI_MR_ENDPOINT) { - LCI_DBG_Assert(server->endpoint_count >= 1, "No endpoints available!\n"); - if (server->cxi_mr_bind_hack) { - // A temporary fix for the cxi provider, currently cxi cannot bind a - // memory region to more than one endpoint. - FI_SAFECALL(fi_mr_bind(mr, &endpoint_p->ep->fid, 0)); - } else { - for (int i = 0; i < server->endpoint_count; ++i) { - FI_SAFECALL(fi_mr_bind(mr, &server->endpoints[i]->ep->fid, 0)); - } - } + FI_SAFECALL(fi_mr_bind(mr, &endpoint_p->ep->fid, 0)); FI_SAFECALL(fi_mr_enable(mr)); } return (void*)mr; @@ -240,12 +228,6 @@ static inline LCI_error_t LCISD_post_puts(LCIS_endpoint_t endpoint_pp, int rank, LCIS_rkey_t rkey) { LCISI_endpoint_t* endpoint_p = (LCISI_endpoint_t*)endpoint_pp; - LCI_Assert( - !endpoint_p->server->cxi_mr_bind_hack || - endpoint_p == endpoint_p->server - ->endpoints[endpoint_p->server->endpoint_count - 1], - "We are using cxi mr_bind hacking mode but unexpected endpoint is " - "performing remote put. Try `export LCI_ENABLE_PRG_NET_ENDPOINT=0`.\n"); uintptr_t addr; if (endpoint_p->server->info->domain_attr->mr_mode & FI_MR_VIRT_ADDR || endpoint_p->server->info->domain_attr->mr_mode & FI_MR_BASIC) { @@ -292,12 +274,6 @@ static inline LCI_error_t LCISD_post_put(LCIS_endpoint_t endpoint_pp, int rank, LCIS_rkey_t rkey, void* ctx) { LCISI_endpoint_t* endpoint_p = (LCISI_endpoint_t*)endpoint_pp; - LCI_Assert( - !endpoint_p->server->cxi_mr_bind_hack || - endpoint_p == endpoint_p->server - ->endpoints[endpoint_p->server->endpoint_count - 1], - "We are using cxi mr_bind hacking mode but an unexpected endpoint is " - "performing remote put. Try `export LCI_ENABLE_PRG_NET_ENDPOINT=0`.\n"); uintptr_t addr; if (endpoint_p->server->info->domain_attr->mr_mode & FI_MR_VIRT_ADDR || endpoint_p->server->info->domain_attr->mr_mode & FI_MR_BASIC) { @@ -345,12 +321,6 @@ static inline LCI_error_t LCISD_post_putImms(LCIS_endpoint_t endpoint_pp, LCIS_rkey_t rkey, uint32_t meta) { LCISI_endpoint_t* endpoint_p = (LCISI_endpoint_t*)endpoint_pp; - LCI_Assert( - !endpoint_p->server->cxi_mr_bind_hack || - endpoint_p == endpoint_p->server - ->endpoints[endpoint_p->server->endpoint_count - 1], - "We are using cxi mr_bind hacking mode but an unexpected endpoint is " - "performing remote put. Try `export LCI_ENABLE_PRG_NET_ENDPOINT=0`.\n"); uintptr_t addr; if (endpoint_p->server->info->domain_attr->mr_mode & FI_MR_VIRT_ADDR || endpoint_p->server->info->domain_attr->mr_mode & FI_MR_BASIC) { @@ -381,12 +351,6 @@ static inline LCI_error_t LCISD_post_putImm(LCIS_endpoint_t endpoint_pp, void* ctx) { LCISI_endpoint_t* endpoint_p = (LCISI_endpoint_t*)endpoint_pp; - LCI_Assert( - !endpoint_p->server->cxi_mr_bind_hack || - endpoint_p == endpoint_p->server - ->endpoints[endpoint_p->server->endpoint_count - 1], - "We are using cxi mr_bind hacking mode but an unexpected endpoint is " - "performing remote put. Try `export LCI_ENABLE_PRG_NET_ENDPOINT=0`.\n"); uintptr_t addr; if (endpoint_p->server->info->domain_attr->mr_mode & FI_MR_VIRT_ADDR || endpoint_p->server->info->domain_attr->mr_mode & FI_MR_BASIC) {