Skip to content

Commit

Permalink
prov/efa: Support unsolicited RDMA write with immediate receive
Browse files Browse the repository at this point in the history
This patch onboards Libfabric with the rdma-core feature that
supports unsolicited RDMA write with immediate receive

linux-rdma/rdma-core#1459

. When a rdma-write with imm is unsolicited, libfabric
won't release the rx pkt and bump the rx pkt counters.

Signed-off-by: Shi Jin <[email protected]>
  • Loading branch information
shijin-aws committed May 30, 2024
1 parent 7ae2c59 commit a082424
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 6 deletions.
9 changes: 9 additions & 0 deletions prov/efa/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ AC_DEFUN([FI_EFA_CONFIGURE],[
have_rdma_size=0
have_caps_rnr_retry=0
have_caps_rdma_write=0
have_caps_unsolicited_write_recv=0
have_ibv_is_fork_initialized=0
efa_support_data_in_order_aligned_128_byte=0
efadv_support_extended_cq=0
Expand All @@ -96,6 +97,11 @@ AC_DEFUN([FI_EFA_CONFIGURE],[
[have_caps_rdma_write=0],
[[#include <infiniband/efadv.h>]])
AC_CHECK_DECL(EFADV_DEVICE_ATTR_CAPS_UNSOLICITED_WRITE_RECV,
[have_caps_unsolicited_write_recv=1],
[have_caps_unsolicited_write_recv=0],
[[#include <infiniband/efadv.h>]])
AC_CHECK_DECL([ibv_is_fork_initialized],
[have_ibv_is_fork_initialized=1],
[have_ibv_is_fork_initialized=0],
Expand Down Expand Up @@ -164,6 +170,9 @@ AC_DEFUN([FI_EFA_CONFIGURE],[
AC_DEFINE_UNQUOTED([HAVE_CAPS_RDMA_WRITE],
[$have_caps_rdma_write],
[Indicates if EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE is defined])
AC_DEFINE_UNQUOTED([HAVE_CAPS_UNSOLICITED_WRITE_RECV],
[$have_caps_unsolicited_write_recv],
[Indicates if EFADV_DEVICE_ATTR_CAPS_UNSOLICITED_WRITE_RECV is defined])
AC_DEFINE_UNQUOTED([HAVE_IBV_IS_FORK_INITIALIZED],
[$have_ibv_is_fork_initialized],
[Indicates if libibverbs has ibv_is_fork_initialized])
Expand Down
4 changes: 4 additions & 0 deletions prov/efa/src/efa_base_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,10 @@ int efa_qp_create(struct efa_qp **qp, struct ibv_qp_init_attr_ex *init_attr_ex)
init_attr_ex);
} else {
assert(init_attr_ex->qp_type == IBV_QPT_DRIVER);
#if HAVE_CAPS_UNSOLICITED_WRITE_RECV
if (efa_device_support_unsolicited_write_recv())
efa_attr.flags |= EFADV_QP_FLAGS_UNSOLICITED_WRITE_RECV;
#endif
efa_attr.driver_qp_type = EFADV_QP_DRIVER_TYPE_SRD;
(*qp)->ibv_qp = efadv_create_qp_ex(
init_attr_ex->pd->context, init_attr_ex, &efa_attr,
Expand Down
5 changes: 5 additions & 0 deletions prov/efa/src/efa_cq.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,11 @@ static inline int efa_cq_ibv_cq_ex_open(struct fi_cq_attr *attr,
.wc_flags = EFADV_WC_EX_WITH_SGID,
};

#if HAVE_CAPS_UNSOLICITED_WRITE_RECV
if (efa_device_support_unsolicited_write_recv())
efadv_cq_init_attr.wc_flags |= EFADV_WC_EX_WITH_IS_UNSOLICITED;
#endif

*ibv_cq_ex = efadv_create_cq(ibv_ctx, &init_attr_ex,
&efadv_cq_init_attr,
sizeof(efadv_cq_init_attr));
Expand Down
20 changes: 20 additions & 0 deletions prov/efa/src/efa_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,26 @@ bool efa_device_support_rdma_write(void)
}
#endif

/**
* @brief check whether efa device support unsolicited write recv
*
* @return a boolean indicating unsolicited write recv
*/
#if HAVE_CAPS_UNSOLICITED_WRITE_RECV
bool efa_device_support_unsolicited_write_recv(void)
{
if (g_device_cnt <=0)
return false;

return g_device_list[0].device_caps & EFADV_DEVICE_ATTR_CAPS_UNSOLICITED_WRITE_RECV;
}
#else
bool efa_device_support_unsolicited_write_recv(void)
{
return false;
}
#endif

#ifndef _WIN32

static char *get_sysfs_path(void)
Expand Down
2 changes: 2 additions & 0 deletions prov/efa/src/efa_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ bool efa_device_support_rdma_read(void);

bool efa_device_support_rdma_write(void);

bool efa_device_support_unsolicited_write_recv(void);

int efa_device_get_driver(struct efa_device *efa_device,
char **efa_driver);

Expand Down
44 changes: 38 additions & 6 deletions prov/efa/src/rdm/efa_rdm_cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,30 @@ static struct fi_ops efa_rdm_cq_fi_ops = {
.ops_open = fi_no_ops_open,
};


#if HAVE_CAPS_UNSOLICITED_WRITE_RECV
/**
* @brief Check whether a completion consumes recv buffer
*
* @param ibv_cq_ex extended ibv cq
* @return true the wc consumes a recv buffer
* @return false the wc doesn't consume a recv buffer
*/
static inline
bool efa_rdm_cq_wc_is_unsolicited(struct ibv_cq_ex *ibv_cq_ex)
{
return efadv_wc_is_unsolicited(efadv_cq_from_ibv_cq_ex(ibv_cq_ex));
}

#else

static inline
bool efa_rdm_cq_wc_is_unsolicited(struct ibv_cq_ex *ibv_cq_ex)
{
return false;
}

#endif
/**
* @brief handle rdma-core CQ completion resulted from IBV_WRITE_WITH_IMM
*
Expand Down Expand Up @@ -120,12 +144,20 @@ void efa_rdm_cq_proc_ibv_recv_rdma_with_imm_completion(

efa_cntr_report_rx_completion(&ep->base_ep.util_ep, flags);

/* Recv with immediate will consume a pkt_entry, but the pkt is not
filled, so free the pkt_entry and record we have one less posted
packet now. */
assert(pkt_entry);
ep->efa_rx_pkts_posted--;
efa_rdm_pke_release_rx(pkt_entry);
/**
* For unsolicited wc, pkt_entry can be NULL, so we can only
* access it for solicited wc.
*/
if (!efa_rdm_cq_wc_is_unsolicited(ibv_cq_ex)) {
/**
* Recv with immediate will consume a pkt_entry, but the pkt is not
* filled, so free the pkt_entry and record we have one less posted
* packet now.
*/
assert(pkt_entry);
ep->efa_rx_pkts_posted--;
efa_rdm_pke_release_rx(pkt_entry);
}
}

#if HAVE_EFADV_CQ_EX
Expand Down

0 comments on commit a082424

Please sign in to comment.