Skip to content

Commit

Permalink
prov/efa: Fix the error handling for unsolicited recv
Browse files Browse the repository at this point in the history
When getting an wc error for rdma with imm + unsolicited
recv, there is no wr_id / pkt_entry associated. Libfabric
should write the eq error directly.

Also added unit tests to cover both solicited and unsolicited
recv error path.

Signed-off-by: Shi Jin <[email protected]>
  • Loading branch information
shijin-aws committed Nov 13, 2024
1 parent 39c2f97 commit 4c2c5a6
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 1 deletion.
3 changes: 2 additions & 1 deletion prov/efa/Makefile.include
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,8 @@ prov_efa_test_efa_unit_test_LDFLAGS = $(cmocka_rpath) $(efa_LDFLAGS) $(cmocka_LD
-Wl,--wrap=efadv_query_device \
-Wl,--wrap=ofi_cudaMalloc \
-Wl,--wrap=ofi_copy_from_hmem_iov \
-Wl,--wrap=efa_rdm_pke_read
-Wl,--wrap=efa_rdm_pke_read \
-Wl,--wrap=efa_device_support_unsolicited_write_recv

if HAVE_EFADV_CQ_EX
prov_efa_test_efa_unit_test_LDFLAGS += -Wl,--wrap=efadv_create_cq
Expand Down
6 changes: 6 additions & 0 deletions prov/efa/src/rdm/efa_rdm_cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,12 @@ void efa_rdm_cq_poll_ibv_cq(ssize_t cqe_to_process, struct efa_ibv_cq *ibv_cq)
break;
case IBV_WC_RECV: /* fall through */
case IBV_WC_RECV_RDMA_WITH_IMM:
if (efa_rdm_cq_wc_is_unsolicited(ibv_cq->ibv_cq_ex)) {
EFA_WARN(FI_LOG_CQ, "Receive error %s (%d) for unsolicited write recv",
efa_strerror(prov_errno), prov_errno);
efa_base_ep_write_eq_error(&ep->base_ep, to_fi_errno(prov_errno), prov_errno);
break;
}
efa_rdm_pke_handle_rx_error(pkt_entry, prov_errno);
break;
default:
Expand Down
93 changes: 93 additions & 0 deletions prov/efa/test/efa_unit_test_cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,99 @@ void test_ibv_cq_ex_read_bad_recv_status(struct efa_resource **state)
assert_int_equal(eq_err_entry.prov_errno, EFA_IO_COMP_STATUS_LOCAL_ERROR_UNRESP_REMOTE);
}

/**
* @brief verify that fi_cq_read/fi_eq_read works properly when rdma-core return bad status for
* recv rdma with imm.
*
* When getting a wc error of op code IBV_WC_RECV_RDMA_WITH_IMM, libfabric cannot find the
* corresponding application operation to write a cq error.
* It will write an EQ error instead.
*
* @param[in] state struct efa_resource that is managed by the framework
* @param[in] use_unsolicited_recv whether to use unsolicited write recv
*/
void test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_impl(struct efa_resource **state, bool use_unsolicited_recv)
{
struct efa_rdm_ep *efa_rdm_ep;
struct efa_resource *resource = *state;
struct fi_cq_data_entry cq_entry;
struct fi_eq_err_entry eq_err_entry;
int ret;
struct efa_rdm_cq *efa_rdm_cq;


efa_unit_test_resource_construct(resource, FI_EP_RDM);
efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid);

efa_rdm_cq = container_of(resource->cq, struct efa_rdm_cq, util_cq.cq_fid.fid);

efa_rdm_cq->ibv_cq.ibv_cq_ex->start_poll = &efa_mock_ibv_start_poll_return_mock;
efa_rdm_cq->ibv_cq.ibv_cq_ex->end_poll = &efa_mock_ibv_end_poll_check_mock;
efa_rdm_cq->ibv_cq.ibv_cq_ex->read_opcode = &efa_mock_ibv_read_opcode_return_mock;
efa_rdm_cq->ibv_cq.ibv_cq_ex->read_vendor_err = &efa_mock_ibv_read_vendor_err_return_mock;
efa_rdm_cq->ibv_cq.ibv_cq_ex->read_qp_num = &efa_mock_ibv_read_qp_num_return_mock;

will_return(efa_mock_ibv_start_poll_return_mock, 0);
will_return(efa_mock_ibv_end_poll_check_mock, NULL);
/* efa_mock_ibv_read_opcode_return_mock() will be called once in release mode,
* but will be called twice in debug mode. because there is an assertion that called ibv_read_opcode(),
* therefore use will_return_always()
*/
will_return_always(efa_mock_ibv_read_opcode_return_mock, IBV_WC_RECV_RDMA_WITH_IMM);
will_return_always(efa_mock_ibv_read_qp_num_return_mock, efa_rdm_ep->base_ep.qp->qp_num);
will_return(efa_mock_ibv_read_vendor_err_return_mock, EFA_IO_COMP_STATUS_FLUSHED);

g_efa_unit_test_mocks.efa_device_support_unsolicited_write_recv = &efa_mock_efa_device_support_unsolicited_write_recv;

#if HAVE_CAPS_UNSOLICITED_WRITE_RECV
if (use_unsolicited_recv) {
efadv_cq_from_ibv_cq_ex(efa_rdm_cq->ibv_cq.ibv_cq_ex)->wc_is_unsolicited = &efa_mock_efadv_wc_is_unsolicited;
will_return(efa_mock_efa_device_support_unsolicited_write_recv, true);
will_return(efa_mock_efadv_wc_is_unsolicited, true);
efa_rdm_cq->ibv_cq.ibv_cq_ex->wr_id = 0;
} else {
/*
* For solicited write recv, it will consume an internal rx pkt
*/
will_return(efa_mock_efa_device_support_unsolicited_write_recv, false);
struct efa_rdm_pke *pkt_entry = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_rx_pkt_pool, EFA_RDM_PKE_FROM_EFA_RX_POOL);
assert_non_null(pkt_entry);
efa_rdm_ep->efa_rx_pkts_posted = efa_rdm_ep_get_rx_pool_size(efa_rdm_ep);
efa_rdm_cq->ibv_cq.ibv_cq_ex->wr_id = (uintptr_t)pkt_entry;
}
#else
/*
* Always test with solicited recv
*/
will_return(efa_mock_efa_device_support_unsolicited_write_recv, false);
struct efa_rdm_pke *pkt_entry = efa_rdm_pke_alloc(efa_rdm_ep, efa_rdm_ep->efa_rx_pkt_pool, EFA_RDM_PKE_FROM_EFA_RX_POOL);
assert_non_null(pkt_entry);
efa_rdm_ep->efa_rx_pkts_posted = efa_rdm_ep_get_rx_pool_size(efa_rdm_ep);
efa_rdm_cq->ibv_cq.ibv_cq_ex->wr_id = (uintptr_t)pkt_entry;
#endif
/* the recv rdma with imm will not populate to application cq because it's an EFA internal error and
* and not related to any application operations. Currently we can only read the error from eq.
*/
efa_rdm_cq->ibv_cq.ibv_cq_ex->status = IBV_WC_GENERAL_ERR;
ret = fi_cq_read(resource->cq, &cq_entry, 1);
assert_int_equal(ret, -FI_EAGAIN);

ret = fi_eq_readerr(resource->eq, &eq_err_entry, 0);
assert_int_equal(ret, sizeof(eq_err_entry));
assert_int_not_equal(eq_err_entry.err, FI_SUCCESS);
assert_int_equal(eq_err_entry.prov_errno, EFA_IO_COMP_STATUS_FLUSHED);
}

void test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_use_unsolicited_recv(struct efa_resource **state)
{
test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_impl(state, true);
}

void test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_use_solicited_recv(struct efa_resource **state)
{
test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_impl(state, false);
}

/**
* @brief verify that fi_cq_read/fi_cq_readerr works properly when ibv_start_poll failed.
*
Expand Down
16 changes: 16 additions & 0 deletions prov/efa/test/efa_unit_test_mocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,11 @@ uint32_t efa_mock_ibv_read_wc_flags_return_mock(struct ibv_cq_ex *current)
return mock();
}

bool efa_mock_efadv_wc_is_unsolicited(struct efadv_cq *efadv_cq)
{
return mock();
}

int g_ofi_copy_from_hmem_iov_call_counter;
ssize_t efa_mock_ofi_copy_from_hmem_iov_inc_counter(void *dest, size_t size,
enum fi_hmem_iface hmem_iface, uint64_t device,
Expand All @@ -197,6 +202,11 @@ int efa_mock_efa_rdm_pke_read_return_mock(struct efa_rdm_ope *ope)
return mock();
}

bool efa_mock_efa_device_support_unsolicited_write_recv()
{
return mock();
}

struct efa_unit_test_mocks g_efa_unit_test_mocks = {
.local_host_id = 0,
.peer_host_id = 0,
Expand All @@ -213,6 +223,7 @@ struct efa_unit_test_mocks g_efa_unit_test_mocks = {
#endif
.ofi_copy_from_hmem_iov = __real_ofi_copy_from_hmem_iov,
.efa_rdm_pke_read = __real_efa_rdm_pke_read,
.efa_device_support_unsolicited_write_recv = __real_efa_device_support_unsolicited_write_recv,
.ibv_is_fork_initialized = __real_ibv_is_fork_initialized,
#if HAVE_EFADV_QUERY_MR
.efadv_query_mr = __real_efadv_query_mr,
Expand Down Expand Up @@ -347,6 +358,11 @@ int __wrap_efa_rdm_pke_read(struct efa_rdm_ope *ope)
return g_efa_unit_test_mocks.efa_rdm_pke_read(ope);
}

bool __wrap_efa_device_support_unsolicited_write_recv(void)
{
return g_efa_unit_test_mocks.efa_device_support_unsolicited_write_recv();
}

enum ibv_fork_status __wrap_ibv_is_fork_initialized(void)
{
return g_efa_unit_test_mocks.ibv_is_fork_initialized();
Expand Down
8 changes: 8 additions & 0 deletions prov/efa/test/efa_unit_test_mocks.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ uint32_t efa_mock_ibv_read_qp_num_return_mock(struct ibv_cq_ex *current);

uint32_t efa_mock_ibv_read_wc_flags_return_mock(struct ibv_cq_ex *current);

bool efa_mock_efadv_wc_is_unsolicited(struct efadv_cq *efadv_cq);

ssize_t __real_ofi_copy_from_hmem_iov(void *dest, size_t size,
enum fi_hmem_iface hmem_iface, uint64_t device,
const struct iovec *hmem_iov,
Expand All @@ -85,8 +87,12 @@ ssize_t efa_mock_ofi_copy_from_hmem_iov_inc_counter(void *dest, size_t size,

int __real_efa_rdm_pke_read(struct efa_rdm_ope *ope);

bool __real_efa_device_support_unsolicited_write_recv();

int efa_mock_efa_rdm_pke_read_return_mock(struct efa_rdm_ope *ope);

bool efa_mock_efa_device_support_unsolicited_write_recv(void);

struct efa_unit_test_mocks
{
uint64_t local_host_id;
Expand Down Expand Up @@ -118,6 +124,8 @@ struct efa_unit_test_mocks

int (*efa_rdm_pke_read)(struct efa_rdm_ope *ope);

bool (*efa_device_support_unsolicited_write_recv)(void);

enum ibv_fork_status (*ibv_is_fork_initialized)(void);

#if HAVE_EFADV_QUERY_MR
Expand Down
3 changes: 3 additions & 0 deletions prov/efa/test/efa_unit_tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ static int efa_unit_test_mocks_teardown(void **state)
#endif
.ofi_copy_from_hmem_iov = __real_ofi_copy_from_hmem_iov,
.efa_rdm_pke_read = __real_efa_rdm_pke_read,
.efa_device_support_unsolicited_write_recv = __real_efa_device_support_unsolicited_write_recv,
.ibv_is_fork_initialized = __real_ibv_is_fork_initialized,
};

Expand Down Expand Up @@ -122,6 +123,8 @@ int main(void)
cmocka_unit_test_setup_teardown(test_rdm_cq_read_bad_send_status_invalid_qpn, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_rdm_cq_read_bad_send_status_message_too_long, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_bad_recv_status, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_use_unsolicited_recv, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_use_solicited_recv, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_recover_forgotten_peer_ah, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_ignore_removed_peer, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_rdm_fallback_to_ibv_create_cq_ex_cq_read_ignore_forgotton_peer, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
Expand Down
2 changes: 2 additions & 0 deletions prov/efa/test/efa_unit_tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ void test_rdm_cq_read_bad_send_status_unreachable_receiver();
void test_rdm_cq_read_bad_send_status_invalid_qpn();
void test_rdm_cq_read_bad_send_status_message_too_long();
void test_ibv_cq_ex_read_bad_recv_status();
void test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_use_unsolicited_recv();
void test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_use_solicited_recv();
void test_ibv_cq_ex_read_recover_forgotten_peer_ah();
void test_rdm_fallback_to_ibv_create_cq_ex_cq_read_ignore_forgotton_peer();
void test_ibv_cq_ex_read_ignore_removed_peer();
Expand Down

0 comments on commit 4c2c5a6

Please sign in to comment.