From a082424f9cd11faf71be009d6eb8f029ef67dd46 Mon Sep 17 00:00:00 2001 From: Shi Jin Date: Mon, 13 May 2024 21:20:40 +0000 Subject: [PATCH] prov/efa: Support unsolicited RDMA write with immediate receive This patch onboards Libfabric with the rdma-core feature that supports unsolicited RDMA write with immediate receive https://github.com/linux-rdma/rdma-core/pull/1459 . When a rdma-write with imm is unsolicited, libfabric won't release the rx pkt and bump the rx pkt counters. Signed-off-by: Shi Jin --- prov/efa/configure.m4 | 9 +++++++ prov/efa/src/efa_base_ep.c | 4 ++++ prov/efa/src/efa_cq.h | 5 ++++ prov/efa/src/efa_device.c | 20 ++++++++++++++++ prov/efa/src/efa_device.h | 2 ++ prov/efa/src/rdm/efa_rdm_cq.c | 44 ++++++++++++++++++++++++++++++----- 6 files changed, 78 insertions(+), 6 deletions(-) diff --git a/prov/efa/configure.m4 b/prov/efa/configure.m4 index 619e9d61c35..f807ce9bc51 100644 --- a/prov/efa/configure.m4 +++ b/prov/efa/configure.m4 @@ -71,6 +71,7 @@ AC_DEFUN([FI_EFA_CONFIGURE],[ have_rdma_size=0 have_caps_rnr_retry=0 have_caps_rdma_write=0 + have_caps_unsolicited_write_recv=0 have_ibv_is_fork_initialized=0 efa_support_data_in_order_aligned_128_byte=0 efadv_support_extended_cq=0 @@ -96,6 +97,11 @@ AC_DEFUN([FI_EFA_CONFIGURE],[ [have_caps_rdma_write=0], [[#include ]]) + AC_CHECK_DECL(EFADV_DEVICE_ATTR_CAPS_UNSOLICITED_WRITE_RECV, + [have_caps_unsolicited_write_recv=1], + [have_caps_unsolicited_write_recv=0], + [[#include ]]) + AC_CHECK_DECL([ibv_is_fork_initialized], [have_ibv_is_fork_initialized=1], [have_ibv_is_fork_initialized=0], @@ -164,6 +170,9 @@ AC_DEFUN([FI_EFA_CONFIGURE],[ AC_DEFINE_UNQUOTED([HAVE_CAPS_RDMA_WRITE], [$have_caps_rdma_write], [Indicates if EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE is defined]) + AC_DEFINE_UNQUOTED([HAVE_CAPS_UNSOLICITED_WRITE_RECV], + [$have_caps_unsolicited_write_recv], + [Indicates if EFADV_DEVICE_ATTR_CAPS_UNSOLICITED_WRITE_RECV is defined]) AC_DEFINE_UNQUOTED([HAVE_IBV_IS_FORK_INITIALIZED], [$have_ibv_is_fork_initialized], [Indicates if libibverbs has ibv_is_fork_initialized]) diff --git a/prov/efa/src/efa_base_ep.c b/prov/efa/src/efa_base_ep.c index aaf7fdf55a1..f05acee0564 100644 --- a/prov/efa/src/efa_base_ep.c +++ b/prov/efa/src/efa_base_ep.c @@ -177,6 +177,10 @@ int efa_qp_create(struct efa_qp **qp, struct ibv_qp_init_attr_ex *init_attr_ex) init_attr_ex); } else { assert(init_attr_ex->qp_type == IBV_QPT_DRIVER); +#if HAVE_CAPS_UNSOLICITED_WRITE_RECV + if (efa_device_support_unsolicited_write_recv()) + efa_attr.flags |= EFADV_QP_FLAGS_UNSOLICITED_WRITE_RECV; +#endif efa_attr.driver_qp_type = EFADV_QP_DRIVER_TYPE_SRD; (*qp)->ibv_qp = efadv_create_qp_ex( init_attr_ex->pd->context, init_attr_ex, &efa_attr, diff --git a/prov/efa/src/efa_cq.h b/prov/efa/src/efa_cq.h index e616741d454..e3486f43f46 100644 --- a/prov/efa/src/efa_cq.h +++ b/prov/efa/src/efa_cq.h @@ -130,6 +130,11 @@ static inline int efa_cq_ibv_cq_ex_open(struct fi_cq_attr *attr, .wc_flags = EFADV_WC_EX_WITH_SGID, }; +#if HAVE_CAPS_UNSOLICITED_WRITE_RECV + if (efa_device_support_unsolicited_write_recv()) + efadv_cq_init_attr.wc_flags |= EFADV_WC_EX_WITH_IS_UNSOLICITED; +#endif + *ibv_cq_ex = efadv_create_cq(ibv_ctx, &init_attr_ex, &efadv_cq_init_attr, sizeof(efadv_cq_init_attr)); diff --git a/prov/efa/src/efa_device.c b/prov/efa/src/efa_device.c index 709948909fd..bfe83ac8d39 100644 --- a/prov/efa/src/efa_device.c +++ b/prov/efa/src/efa_device.c @@ -268,6 +268,26 @@ bool efa_device_support_rdma_write(void) } #endif +/** + * @brief check whether efa device support unsolicited write recv + * + * @return a boolean indicating unsolicited write recv + */ +#if HAVE_CAPS_UNSOLICITED_WRITE_RECV +bool efa_device_support_unsolicited_write_recv(void) +{ + if (g_device_cnt <=0) + return false; + + return g_device_list[0].device_caps & EFADV_DEVICE_ATTR_CAPS_UNSOLICITED_WRITE_RECV; +} +#else +bool efa_device_support_unsolicited_write_recv(void) +{ + return false; +} +#endif + #ifndef _WIN32 static char *get_sysfs_path(void) diff --git a/prov/efa/src/efa_device.h b/prov/efa/src/efa_device.h index 061b38d9775..302510a607f 100644 --- a/prov/efa/src/efa_device.h +++ b/prov/efa/src/efa_device.h @@ -35,6 +35,8 @@ bool efa_device_support_rdma_read(void); bool efa_device_support_rdma_write(void); +bool efa_device_support_unsolicited_write_recv(void); + int efa_device_get_driver(struct efa_device *efa_device, char **efa_driver); diff --git a/prov/efa/src/rdm/efa_rdm_cq.c b/prov/efa/src/rdm/efa_rdm_cq.c index fca5eae3f05..93cf8d0f26b 100644 --- a/prov/efa/src/rdm/efa_rdm_cq.c +++ b/prov/efa/src/rdm/efa_rdm_cq.c @@ -71,6 +71,30 @@ static struct fi_ops efa_rdm_cq_fi_ops = { .ops_open = fi_no_ops_open, }; + +#if HAVE_CAPS_UNSOLICITED_WRITE_RECV +/** + * @brief Check whether a completion consumes recv buffer + * + * @param ibv_cq_ex extended ibv cq + * @return true the wc consumes a recv buffer + * @return false the wc doesn't consume a recv buffer + */ +static inline +bool efa_rdm_cq_wc_is_unsolicited(struct ibv_cq_ex *ibv_cq_ex) +{ + return efadv_wc_is_unsolicited(efadv_cq_from_ibv_cq_ex(ibv_cq_ex)); +} + +#else + +static inline +bool efa_rdm_cq_wc_is_unsolicited(struct ibv_cq_ex *ibv_cq_ex) +{ + return false; +} + +#endif /** * @brief handle rdma-core CQ completion resulted from IBV_WRITE_WITH_IMM * @@ -120,12 +144,20 @@ void efa_rdm_cq_proc_ibv_recv_rdma_with_imm_completion( efa_cntr_report_rx_completion(&ep->base_ep.util_ep, flags); - /* Recv with immediate will consume a pkt_entry, but the pkt is not - filled, so free the pkt_entry and record we have one less posted - packet now. */ - assert(pkt_entry); - ep->efa_rx_pkts_posted--; - efa_rdm_pke_release_rx(pkt_entry); + /** + * For unsolicited wc, pkt_entry can be NULL, so we can only + * access it for solicited wc. + */ + if (!efa_rdm_cq_wc_is_unsolicited(ibv_cq_ex)) { + /** + * Recv with immediate will consume a pkt_entry, but the pkt is not + * filled, so free the pkt_entry and record we have one less posted + * packet now. + */ + assert(pkt_entry); + ep->efa_rx_pkts_posted--; + efa_rdm_pke_release_rx(pkt_entry); + } } #if HAVE_EFADV_CQ_EX