Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

prov/efa: Skip rx pkt refill under certain threshold #10594

Merged
merged 1 commit into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions man/fi_efa.7.md
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,11 @@ for details.
: Use device's unsolicited write recv functionality when it's available. (Default: 1).
Setting this environment variable to 0 can disable this feature.

*FI_EFA_INTERNAL_RX_REFILL_THRESHOLD*
: The threshold that EFA provider will refill the internal rx pkt pool. (Default: 8).
When the number of internal rx pkts to post is lower than this threshold,
the refill will be skipped.

# SEE ALSO

[`fabric`(7)](fabric.7.html),
Expand Down
4 changes: 4 additions & 0 deletions prov/efa/src/efa_env.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ struct efa_env efa_env = {
.use_sm2 = false,
.huge_page_setting = EFA_ENV_HUGE_PAGE_UNSPEC,
.use_unsolicited_write_recv = 1,
.internal_rx_refill_threshold = 8,
};

/**
Expand Down Expand Up @@ -132,6 +133,7 @@ void efa_env_param_get(void)
&efa_mr_max_cached_size);
fi_param_get_size_t(&efa_prov, "tx_size", &efa_env.tx_size);
fi_param_get_size_t(&efa_prov, "rx_size", &efa_env.rx_size);
fi_param_get_size_t(&efa_prov, "internal_rx_refill_threshold", &efa_env.internal_rx_refill_threshold);
fi_param_get_bool(&efa_prov, "rx_copy_unexp",
&efa_env.rx_copy_unexp);
fi_param_get_bool(&efa_prov, "rx_copy_ooo",
Expand Down Expand Up @@ -232,6 +234,8 @@ void efa_env_define()
"will use huge page unless FI_EFA_FORK_SAFE is set to 1/on/true.");
fi_param_define(&efa_prov, "use_unsolicited_write_recv", FI_PARAM_BOOL,
"Use device's unsolicited write recv functionality when it's available. (Default: true)");
fi_param_define(&efa_prov, "internal_rx_refill_threshold", FI_PARAM_SIZE_T,
"The threshold that EFA provider will refill the internal rx pkt pool. (Default: %zu)", efa_env.internal_rx_refill_threshold);
}


Expand Down
6 changes: 6 additions & 0 deletions prov/efa/src/efa_env.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ struct efa_env {
int use_sm2;
enum efa_env_huge_page_setting huge_page_setting;
int use_unsolicited_write_recv;
/**
* The threshold that EFA provider will refill the internal rx pkt pool.
* When the number of internal rx pkts to post is lower than this threshold,
* the refill will be skipped.
*/
size_t internal_rx_refill_threshold;
};

extern struct efa_env efa_env;
Expand Down
2 changes: 2 additions & 0 deletions prov/efa/src/rdm/efa_rdm_ep.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ struct efa_domain *efa_rdm_ep_domain(struct efa_rdm_ep *ep)

void efa_rdm_ep_post_internal_rx_pkts(struct efa_rdm_ep *ep);

int efa_rdm_ep_bulk_post_internal_rx_pkts(struct efa_rdm_ep *ep);

/**
* @brief return whether this endpoint should write error cq entry for RNR.
*
Expand Down
6 changes: 5 additions & 1 deletion prov/efa/src/rdm/efa_rdm_ep_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,11 @@ int efa_rdm_ep_bulk_post_internal_rx_pkts(struct efa_rdm_ep *ep)
{
int i, err;

if (ep->efa_rx_pkts_to_post == 0)
/**
* When efa_env.internal_rx_refill_threshold > efa_rdm_ep_get_rx_pool_size(ep),
* we should always refill when the pool is empty.
*/
if (ep->efa_rx_pkts_to_post < MIN(efa_env.internal_rx_refill_threshold, efa_rdm_ep_get_rx_pool_size(ep)))
return 0;

assert(ep->efa_rx_pkts_to_post + ep->efa_rx_pkts_posted <= ep->efa_max_outstanding_rx_ops);
Expand Down
80 changes: 80 additions & 0 deletions prov/efa/test/efa_unit_test_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -1219,3 +1219,83 @@ void test_efa_rdm_ep_post_handshake_error_handling_pke_exhaustion(struct efa_res

free(pkt_entry_vec);
}

static
void test_efa_rdm_ep_rx_refill_impl(struct efa_resource **state, int threshold, int rx_size)
{
struct efa_resource *resource = *state;
struct efa_rdm_ep *efa_rdm_ep;
struct efa_rdm_pke *pkt_entry;
int i;
size_t threshold_orig;

if (threshold < 4 || rx_size < 4) {
fprintf(stderr, "Too small threshold or rx_size for this test\n");
fail();
}

threshold_orig = efa_env.internal_rx_refill_threshold;

efa_env.internal_rx_refill_threshold = threshold;

resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM);
assert_non_null(resource->hints);
resource->hints->rx_attr->size = rx_size;
efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14),
resource->hints, true, true);

efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid);
assert_int_equal(efa_rdm_ep_get_rx_pool_size(efa_rdm_ep), rx_size);

/* Grow the rx pool and post rx pkts */
efa_rdm_ep_post_internal_rx_pkts(efa_rdm_ep);
assert_int_equal(efa_rdm_ep->efa_rx_pkts_posted, efa_rdm_ep_get_rx_pool_size(efa_rdm_ep));

assert_int_equal(efa_rdm_ep->efa_rx_pkts_to_post, 0);
for (i = 0; i < 4; i++) {
pkt_entry = ofi_bufpool_get_ibuf(efa_rdm_ep->efa_rx_pkt_pool, i);
assert_non_null(pkt_entry);
efa_rdm_pke_release_rx(pkt_entry);
}
assert_int_equal(efa_rdm_ep->efa_rx_pkts_to_post, 4);

efa_rdm_ep_bulk_post_internal_rx_pkts(efa_rdm_ep);

/**
* efa_rx_pkts_to_post < FI_EFA_RX_REFILL_THRESHOLD
* pkts should NOT be refilled
*/
assert_int_equal(efa_rdm_ep->efa_rx_pkts_to_post, 4);
assert_int_equal(efa_rdm_ep->efa_rx_pkts_posted, rx_size);

/* releasing more pkts to reach the threshold or rx_size*/
for (i = 4; i < MIN(rx_size, threshold); i++) {
pkt_entry = ofi_bufpool_get_ibuf(efa_rdm_ep->efa_rx_pkt_pool, i);
assert_non_null(pkt_entry);
efa_rdm_pke_release_rx(pkt_entry);
}

assert_int_equal(efa_rdm_ep->efa_rx_pkts_to_post, i);

efa_rdm_ep_bulk_post_internal_rx_pkts(efa_rdm_ep);

/**
* efa_rx_pkts_to_post == min(FI_EFA_RX_REFILL_THRESHOLD, FI_EFA_RX_SIZE)
* pkts should be refilled
*/
assert_int_equal(efa_rdm_ep->efa_rx_pkts_to_post, 0);
assert_int_equal(efa_rdm_ep->efa_rx_pkts_posted, rx_size + i);

/* recover the original value */
efa_env.internal_rx_refill_threshold = threshold_orig;
}

void test_efa_rdm_ep_rx_refill_threshold_smaller_than_rx_size(struct efa_resource **state)
{
test_efa_rdm_ep_rx_refill_impl(state, 8, 64);
}

void test_efa_rdm_ep_rx_refill_threshold_larger_than_rx_size(struct efa_resource **state)
{
test_efa_rdm_ep_rx_refill_impl(state, 128, 64);
}
2 changes: 2 additions & 0 deletions prov/efa/test/efa_unit_tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ int main(void)
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_zcpy_recv_cancel, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_zcpy_recv_eagain, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_post_handshake_error_handling_pke_exhaustion, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_rx_refill_threshold_smaller_than_rx_size, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rdm_ep_rx_refill_threshold_larger_than_rx_size, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_dgram_cq_read_empty_cq, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_empty_cq, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_failed_poll, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
Expand Down
2 changes: 2 additions & 0 deletions prov/efa/test/efa_unit_tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ void test_efa_rdm_ep_close_discard_posted_recv();
void test_efa_rdm_ep_zcpy_recv_cancel();
void test_efa_rdm_ep_zcpy_recv_eagain();
void test_efa_rdm_ep_post_handshake_error_handling_pke_exhaustion();
void test_efa_rdm_ep_rx_refill_threshold_smaller_than_rx_size();
void test_efa_rdm_ep_rx_refill_threshold_larger_than_rx_size();
void test_dgram_cq_read_empty_cq();
void test_ibv_cq_ex_read_empty_cq();
void test_ibv_cq_ex_read_failed_poll();
Expand Down
Loading