From db3600c7954a80bf40a893a5d549b0a2f386b551 Mon Sep 17 00:00:00 2001 From: Sai Sunku Date: Tue, 14 Nov 2023 22:16:21 +0000 Subject: [PATCH] prov/efa: Add pingpong test after exhausting MRs Add a pingpong test that exhausts MRs on both client and server This test first exhausts MRs on the server and runs a pingpong test It then exhausts MRs on the client and runs another pingpong test A pytest hook is used to run this test after all other tests to prevent MR exhaustion from affecting other tests Signed-off-by: Sai Sunku (cherry picked from commit a715487fb64203d3f4ccec5627ec32f6b22d13b8) --- fabtests/prov/efa/Makefile.include | 15 +++ .../efa/src/efa_exhaust_mr_reg_rdm_pingpong.c | 126 ++++++++++++++++++ fabtests/pytest/efa/conftest.py | 13 ++ fabtests/pytest/efa/test_rdm.py | 6 + 4 files changed, 160 insertions(+) create mode 100644 fabtests/prov/efa/src/efa_exhaust_mr_reg_rdm_pingpong.c diff --git a/fabtests/prov/efa/Makefile.include b/fabtests/prov/efa/Makefile.include index 1db41188cf9..895885e9e54 100644 --- a/fabtests/prov/efa/Makefile.include +++ b/fabtests/prov/efa/Makefile.include @@ -33,6 +33,9 @@ bin_PROGRAMS += prov/efa/src/fi_efa_rnr_read_cq_error \ prov/efa/src/fi_efa_rnr_queue_resend \ prov/efa/src/fi_efa_info_test +if HAVE_VERBS_DEVEL +bin_PROGRAMS += prov/efa/src/fi_efa_exhaust_mr_reg_rdm_pingpong +endif HAVE_VERBS_DEVEL efa_rnr_srcs = \ prov/efa/src/efa_rnr_shared.h \ @@ -51,3 +54,15 @@ prov_efa_src_fi_efa_rnr_queue_resend_LDADD = libfabtests.la prov_efa_src_fi_efa_info_test_SOURCES = \ prov/efa/src/efa_info_test.c prov_efa_src_fi_efa_info_test_LDADD = libfabtests.la + +if HAVE_VERBS_DEVEL +efa_exhaust_mr_reg_srcs = \ + prov/efa/src/efa_exhaust_mr_reg_common.h \ + prov/efa/src/efa_exhaust_mr_reg_common.c + +prov_efa_src_fi_efa_exhaust_mr_reg_rdm_pingpong_SOURCES = \ + prov/efa/src/efa_exhaust_mr_reg_rdm_pingpong.c \ + $(efa_exhaust_mr_reg_srcs) \ + $(benchmarks_srcs) +prov_efa_src_fi_efa_exhaust_mr_reg_rdm_pingpong_LDADD = libfabtests.la +endif HAVE_VERBS_DEVEL diff --git a/fabtests/prov/efa/src/efa_exhaust_mr_reg_rdm_pingpong.c b/fabtests/prov/efa/src/efa_exhaust_mr_reg_rdm_pingpong.c new file mode 100644 index 00000000000..caa0ee89828 --- /dev/null +++ b/fabtests/prov/efa/src/efa_exhaust_mr_reg_rdm_pingpong.c @@ -0,0 +1,126 @@ +/* Copyright Amazon.com, Inc. or its affiliates. All rights reserved. */ + +#include +#include +#include + +#include + +#include "shared.h" +#include "benchmarks/benchmark_shared.h" +#include "efa_exhaust_mr_reg_common.h" + +static int run(void) +{ + int i, ret = 0; + + if (!(opts.options & FT_OPT_SIZE)) { + for (i = 0; i < TEST_CNT; i++) { + if (!ft_use_size(i, opts.sizes_enabled)) + continue; + opts.transfer_size = test_size[i].size; + init_test(&opts, test_name, sizeof(test_name)); + ret = pingpong(); + if (ret) + return ret; + } + } else { + init_test(&opts, test_name, sizeof(test_name)); + ret = pingpong(); + if (ret) + return ret; + } + + return ft_finalize(); +} + +int main(int argc, char **argv) +{ + int op, ret, err; + size_t registered; + void *buffers[EFA_MR_REG_LIMIT]; + struct ibv_mr *mr_reg_vec[EFA_MR_REG_LIMIT]; + struct ibv_pd *pd; + + opts = INIT_OPTS; + opts.options |= FT_OPT_SKIP_REG_MR; + opts.mr_mode &= ~FI_MR_LOCAL; + + hints = fi_allocinfo(); + if (!hints) + return EXIT_FAILURE; + + while ((op = getopt_long(argc, argv, "Uh" CS_OPTS INFO_OPTS BENCHMARK_OPTS, + long_opts, &lopt_idx)) != -1) { + switch (op) { + default: + if (!ft_parse_long_opts(op, optarg)) + continue; + ft_parse_benchmark_opts(op, optarg); + ft_parseinfo(op, optarg, hints, &opts); + ft_parsecsopts(op, optarg, &opts); + break; + case 'U': + hints->tx_attr->op_flags |= FI_DELIVERY_COMPLETE; + break; + case '?': + case 'h': + ft_csusage(argv[0], "Ping pong client and server using RDM after exhausting MR limits on the EFA device."); + ft_benchmark_usage(); + ft_longopts_usage(); + return EXIT_FAILURE; + } + } + + if (optind < argc) + opts.dst_addr = argv[optind]; + + hints->ep_attr->type = FI_EP_RDM; + hints->caps = FI_MSG; + hints->mode |= FI_CONTEXT; + hints->domain_attr->mr_mode = opts.mr_mode; + hints->domain_attr->threading = FI_THREAD_DOMAIN; + hints->addr_format = opts.address_format; + + ret = ft_init_fabric(); + if (ret) + return ft_exit_code(ret); + + /* Run progress engine to grow bounce buffers before exhausting MRs */ + ft_force_progress(); + + ft_sync(); + if (opts.dst_addr) { + err = ft_efa_alloc_bufs(buffers, EFA_MR_REG_BUF_SIZE, + EFA_MR_REG_LIMIT); + if (err) + FT_PRINTERR("alloc bufs", -err); + + err = ft_efa_setup_ibv_pd(&pd); + if (err) + FT_PRINTERR("ibv protection domain", -err); + + printf("Exhausting MRs on client\n"); + err = ft_efa_register_mr_reg(pd, buffers, EFA_MR_REG_BUF_SIZE, + mr_reg_vec, EFA_MR_REG_LIMIT, + ®istered); + if (err) + FT_PRINTERR("ibv mr reg", -err); + } + + ft_sync(); + printf("Running pingpong test\n"); + ret = run(); + + if (opts.dst_addr) { + printf("Deregistering MRs on client\n"); + err = ft_efa_deregister_mr_reg(mr_reg_vec, registered); + if (err) + FT_PRINTERR("ibv mr dereg", -err); + ft_efa_destroy_ibv_pd(pd); + } + + ft_free_res(); + + return ft_exit_code(ret); +} diff --git a/fabtests/pytest/efa/conftest.py b/fabtests/pytest/efa/conftest.py index 5c4bc388e6e..717467a3b55 100644 --- a/fabtests/pytest/efa/conftest.py +++ b/fabtests/pytest/efa/conftest.py @@ -25,3 +25,16 @@ def message_size(request): "r:8000,4,9000",]) def inject_message_size(request): return request.param + +@pytest.hookimpl(hookwrapper=True) +def pytest_collection_modifyitems(session, config, items): + # Called after collection has been performed, may filter or re-order the items in-place + # We use this hook to always run the MR exhaustion test at the end + mr_exhaustion_tests, other_tests = [], [] + for item in items: + if "mr_exhaustion" in item.name: + mr_exhaustion_tests.append(item) + else: + other_tests.append(item) + + yield other_tests + mr_exhaustion_tests diff --git a/fabtests/pytest/efa/test_rdm.py b/fabtests/pytest/efa/test_rdm.py index b70cd1d3f45..47dc75fc760 100644 --- a/fabtests/pytest/efa/test_rdm.py +++ b/fabtests/pytest/efa/test_rdm.py @@ -11,6 +11,12 @@ def test_rdm_pingpong(cmdline_args, iteration_type, completion_semantic, memory_ efa_run_client_server_test(cmdline_args, "fi_rdm_pingpong", iteration_type, completion_semantic, memory_type, "all", completion_type=completion_type) +@pytest.mark.functional +@pytest.mark.serial +def test_mr_exhaustion_rdm_pingpong(cmdline_args): + efa_run_client_server_test(cmdline_args, "fi_efa_exhaust_mr_reg_rdm_pingpong", "short", + "transmit_complete", "host_to_host", "all", timeout=1000) + @pytest.mark.functional def test_rdm_pingpong_range(cmdline_args, completion_semantic, memory_type, message_size): efa_run_client_server_test(cmdline_args, "fi_rdm_pingpong", "short",