Skip to content

Commit

Permalink
fabtests: New fabtest fi_flood to test over subscription of resources
Browse files Browse the repository at this point in the history
	  1.  MR cache based registrations
	  tests regsiter and send  in batch and sequential modes while
	  flooding the cache beyond the maximum size.
	  2. Test receipt of unexpected messages by overwhelming the receiver

Signed-off-by: nikhil nanal <[email protected]>
  • Loading branch information
nikhilnanal authored and aingerson committed Nov 13, 2024
1 parent f61cb8a commit 15278bb
Show file tree
Hide file tree
Showing 12 changed files with 119 additions and 73 deletions.
10 changes: 5 additions & 5 deletions fabtests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ bin_PROGRAMS = \
functional/fi_rdm_atomic \
functional/fi_rdm_stress \
functional/fi_multi_recv \
functional/fi_bw \
functional/fi_flood \
functional/fi_rdm_multi_client \
functional/fi_loopback \
benchmarks/fi_msg_pingpong \
Expand Down Expand Up @@ -360,9 +360,9 @@ functional_fi_multi_recv_SOURCES = \
functional/multi_recv.c
functional_fi_multi_recv_LDADD = libfabtests.la

functional_fi_bw_SOURCES = \
functional/bw.c
functional_fi_bw_LDADD = libfabtests.la
functional_fi_flood_SOURCES = \
functional/flood.c
functional_fi_flood_LDADD = libfabtests.la

functional_fi_rdm_multi_client_SOURCES = \
functional/rdm_multi_client.c
Expand Down Expand Up @@ -651,7 +651,7 @@ dummy_man_pages = \
man/man1/fi_eq_test.1 \
man/man1/fi_getinfo_test.1 \
man/man1/fi_mr_test.1 \
man/man1/fi_bw.1 \
man/man1/fi_flood.1 \
man/man1/fi_rdm_multi_client.1 \
man/man1/fi_ubertest.1 \
man/man1/fi_efa_ep_rnr_retry.1
Expand Down
4 changes: 2 additions & 2 deletions fabtests/Makefile.win
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ benchmarks: $(outdir)\dgram_pingpong.exe $(outdir)\msg_bw.exe \
$(outdir)\rdm_pingpong.exe $(outdir)\rma_pingpong.exe $(outdir)\rdm_tagged_bw.exe \
$(outdir)\rdm_bw.exe $(outdir)\rdm_tagged_pingpong.exe $(outdir)\rma_bw.exe

functional: $(outdir)\av_xfer.exe $(outdir)\bw.exe $(outdir)\cm_data.exe $(outdir)\cq_data.exe \
functional: $(outdir)\av_xfer.exe $(outdir)\flood.exe $(outdir)\cm_data.exe $(outdir)\cq_data.exe \
$(outdir)\dgram.exe $(outdir)\msg.exe $(outdir)\msg_epoll.exe \
$(outdir)\inject_test.exe $(outdir)\msg_sockets.exe $(outdir)\multi_mr.exe \
$(outdir)\multi_ep.exe $(outdir)\multi_recv.exe $(outdir)\rdm.exe \
Expand Down Expand Up @@ -120,7 +120,7 @@ $(outdir)\rma_bw.exe: {benchmarks}rma_bw.c $(basedeps) {benchmarks}benchmark_sha

$(outdir)\av_xfer.exe: {functional}av_xfer.c $(basedeps)

$(outdir)\bw.exe: {functional}bw.c $(basedeps)
$(outdir)\flood.exe: {functional}flood.c $(basedeps)

$(outdir)\cm_data.exe: {functional}cm_data.c $(basedeps)

Expand Down
2 changes: 1 addition & 1 deletion fabtests/fabtests.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@
<ClCompile Include="functional\rdm_netdir.c" />
<ClCompile Include="functional\scalable_ep.c" />
<ClCompile Include="functional\inject_test.c" />
<ClCompile Include="functional\bw.c" />
<ClCompile Include="functional\flood.c" />
<ClCompile Include="unit\av_test.c" />
<ClCompile Include="unit\cntr_test.c" />
<ClCompile Include="unit\common.c" />
Expand Down
2 changes: 1 addition & 1 deletion fabtests/fabtests.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@
<ClCompile Include="functional\unexpected_msg.c">
<Filter>Source Files\functional</Filter>
</ClCompile>
<ClCompile Include="functional\bw.c">
<ClCompile Include="functional\flood.c">
<Filter>Source Files\functional</Filter>
</ClCompile>
<ClCompile Include="functional\inject_test.c">
Expand Down
138 changes: 90 additions & 48 deletions fabtests/functional/bw.c → fabtests/functional/flood.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019 Intel Corporation. All rights reserved.
* Copyright (c) Intel Corporation. All rights reserved.
*
* This software is available to you under the BSD license
* below:
Expand Down Expand Up @@ -34,7 +34,7 @@

#include <shared.h>

int sleep_time = 0;
static int sleep_time = 0;

static ssize_t post_one_tx(struct ft_context *msg)
{
Expand Down Expand Up @@ -99,94 +99,136 @@ static int post_rx_sync(void)
return ret;
}

static int run_loop(void)
static void mr_close_all(struct ft_context *ctx_arr, int window_size)
{
int i;

for (i = 0; i < window_size; i++)
FT_CLOSE_FID(ctx_arr[i].mr);
}

static int run_seq_mr_send(void) {

int ret;
int i;

mr_close_all(tx_ctx_arr, opts.window_size);
mr_close_all(rx_ctx_arr, opts.window_size);

printf("Sequential memory registration:");
if (opts.dst_addr) {
for (i = 0; i < opts.window_size; i++) {
ret = ft_reg_mr(fi, tx_ctx_arr[i].buf, tx_mr_size,
ft_info_to_mr_access(fi),
FT_TX_MR_KEY + i, opts.iface, opts.device,
&(tx_ctx_arr[i].mr), &(tx_ctx_arr[i].desc));
if (ret)
goto out;

ret = post_one_tx(&tx_ctx_arr[i]);
if (ret)
goto out;

ret = ft_get_tx_comp(tx_seq);
if (ret)
goto out;

FT_CLOSE_FID(tx_ctx_arr[i].mr);
}
} else {
for (i = 0; i < opts.window_size; i++) {
ret = ft_reg_mr(fi, rx_ctx_arr[i].buf, rx_mr_size,
ft_info_to_mr_access(fi), FT_RX_MR_KEY + i, opts.iface, opts.device,
&(rx_ctx_arr[i].mr),
&(rx_ctx_arr[i].desc));
if (ret)
goto out;

ret = ft_post_rx_buf(ep, opts.transfer_size,
&(rx_ctx_arr[i].context),
rx_ctx_arr[i].buf,
rx_ctx_arr[i].desc, ft_tag);
if (ret)
goto out;

ret = wait_check_rx_bufs();
if (ret)
goto out;

FT_CLOSE_FID(rx_ctx_arr[i].mr);
}
}
if (opts.options & FT_OPT_OOB_SYNC)
ret = ft_sync();
else
ret = post_rx_sync();
out:
printf("%s\n", ret ? "Fail" : "Pass");
return ret;
}

static int run_batch_mr_send(void)
{
int ret, i;

/* Receive side delay is used in order to let the sender
get ahead of the receiver and post multiple sends
before the receiver begins processing them. */
* get ahead of the receiver and post multiple sends
* before the receiver begins processing them.
*/
if (!opts.dst_addr)
sleep(sleep_time);

ft_start();
printf("Batch memory registration:");
if (opts.dst_addr) {
for (i = 0; i < opts.window_size; i++) {
ret = post_one_tx(&tx_ctx_arr[i]);
if (ret)
return ret;
goto out;
}

ret = ft_get_tx_comp(tx_seq);
if (ret)
return ret;
goto out;
} else {
for (i = 0; i < opts.window_size; i++) {
ret = ft_post_rx_buf(ep, opts.transfer_size,
&rx_ctx_arr[i].context,
rx_ctx_arr[i].buf,
rx_ctx_arr[i].desc, 0);
if (ret)
return ret;
goto out;
}

ret = wait_check_rx_bufs();
if (ret)
return ret;
goto out;
}
ft_stop();

if (opts.options & FT_OPT_OOB_SYNC)
ret = ft_sync();
else
ret = post_rx_sync();
if (ret)
return ret;

if (opts.machr)
show_perf_mr(opts.transfer_size, opts.window_size, &start, &end, 1,
opts.argc, opts.argv);
else
show_perf(NULL, opts.transfer_size, opts.window_size, &start, &end, 1);

out:
printf("%s\n", ret ? "Fail" : "Pass");
return ret;
}

static int run(void)
{
int ret, i;
int ret;

ret = hints->ep_attr->type == FI_EP_MSG ?
ft_init_fabric_cm() : ft_init_fabric();
if (ret)
return ret;

ret = ft_tx(ep, remote_fi_addr, 1, &tx_ctx);
if (ret)
return ret;

ret = ft_get_tx_comp(tx_seq);
ret = run_batch_mr_send();
if (ret)
return ret;
goto out;

ret = ft_get_rx_comp(rx_seq);
ret = run_seq_mr_send();
if (ret)
return ret;

if (!(opts.options & FT_OPT_SIZE)) {
for (i = 0; i < TEST_CNT; i++) {
if (!ft_use_size(i, opts.sizes_enabled))
continue;
opts.transfer_size = test_size[i].size;
ret = run_loop();
if (ret)
goto out;
}
} else {
ret = run_loop();
if (ret)
goto out;
}
goto out;

out:
return ret;
Expand All @@ -197,6 +239,8 @@ int main(int argc, char **argv)
int op, ret;

opts = INIT_OPTS;
opts.options |= FT_OPT_ALLOC_MULT_MR;
opts.options |= FT_OPT_NO_PRE_POSTED_RX;

hints = fi_allocinfo();
if (!hints)
Expand Down Expand Up @@ -225,7 +269,7 @@ int main(int argc, char **argv)
break;
case '?':
case 'h':
ft_usage(argv[0], "A bandwidth test with data verification.");
ft_usage(argv[0], "test to oversubscribe mr cache and receiver with unexpected msgs.");
FT_PRINT_OPTS_USAGE("-T sleep_time",
"Receive side delay before starting");
FT_PRINT_OPTS_USAGE("-v", "Enable data verification");
Expand All @@ -243,8 +287,6 @@ int main(int argc, char **argv)
hints->domain_attr->mr_mode = opts.mr_mode;
hints->addr_format = opts.address_format;

opts.options |= FT_OPT_ALLOC_MULT_MR;

if (hints->ep_attr->type == FI_EP_DGRAM) {
fprintf(stderr, "This test does not support DGRAM endpoints\n");
return -FI_EINVAL;
Expand All @@ -260,4 +302,4 @@ int main(int argc, char **argv)
ft_free_res();

return ft_exit_code(ret);
}
}
11 changes: 7 additions & 4 deletions fabtests/man/fabtests.7.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,13 @@ features of libfabric.
buffer tries to remain the same. This test is used to validate the
correct behavior of memory registration caches.

*fi_bw*
: Performs a one-sided bandwidth test with an option for data verification.
A sleep time on the receiving side can be enabled in order to allow
the sender to get ahead of the receiver.
*fi_flood*
: The test performs a one-sided transfer by utilizing Bulk Memory Region (MR)
registration and flooding the receiver with unexpected messages. This is
followed by sequential MR registration transfers, which force the MR cache
to evict the least recently used MRs before making new transfers. An optional
sleep time can be enabled on the receiving side to allow the sender to get
ahead of the receiver.

*fi_rdm_multi_client*
: Tests a persistent server communicating with multiple clients, one at a
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion fabtests/pytest/default/test_rdm.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def test_rdm_shared_av(cmdline_args):
@pytest.mark.functional
def test_rdm_bw_functional(cmdline_args, completion_semantic):
from common import ClientServerTest
test = ClientServerTest(cmdline_args, "fi_bw -e rdm -v -T 1", completion_semantic=completion_semantic)
test = ClientServerTest(cmdline_args, "fi_flood -e rdm -v -T 1", completion_semantic=completion_semantic)
test.run()

@pytest.mark.parametrize("iteration_type",
Expand Down
2 changes: 1 addition & 1 deletion fabtests/pytest/efa/test_flood_peer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
@pytest.mark.functional
def test_flood_peer(cmdline_args):
from common import ClientServerTest
test = ClientServerTest(cmdline_args, "fi_bw -e rdm -W 6400 -S 512 -T 5",
test = ClientServerTest(cmdline_args, "fi_flood -e rdm -W 6400 -S 512 -T 5",
timeout=300)
test.run()
6 changes: 3 additions & 3 deletions fabtests/scripts/runfabtests.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ set functional_tests=^
"inject_test -N -A inject -v"^
"inject_test -A inj_complete -v"^
"inject_test -N -A inj_complete -v"^
"bw -e rdm -v -T 1"^
"bw -e rdm -v -T 1 -U"^
"bw -e msg -v -T 1"^
"flood -e rdm -v -T 1"^
"flood -e rdm -v -T 1 -U"^
"flood -e msg -v -T 1"^
"rdm_multi_client -C 10 -I 5"^
"rdm_multi_client -C 10 -I 5 -U"

Expand Down
6 changes: 3 additions & 3 deletions fabtests/scripts/runfabtests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,9 @@ functional_tests=(
"fi_inject_test -N -A inject -v"
"fi_inject_test -A inj_complete -v"
"fi_inject_test -N -A inj_complete -v"
"fi_bw -e rdm -v -T 1"
"fi_bw -e rdm -v -T 1 -U"
"fi_bw -e msg -v -T 1"
"fi_flood -e rdm -v -T 1"
"fi_flood -e rdm -v -T 1 -U"
"fi_flood -e msg -v -T 1"
"fi_rdm_multi_client -C 10 -I 5"
"fi_rdm_multi_client -C 10 -I 5 -U"
)
Expand Down
9 changes: 5 additions & 4 deletions fabtests/test_configs/tcp/io_uring.exclude
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,14 @@ fi_msg_sockets
# fi_unexpected_msg -e rdm fails with no message
fi_unexpected_msg -e rdm

# fi_bw -e msg fails with
# fi_flood -e msg fails with
# fi_eq_sread(): common/shared.c:1165, ret=-4 (Interrupted system call)
fi_bw -e msg
fi_flood -e msg

# fi_bw fails by hanging
# fi_flood fails by hanging
# fi_flood fails by runfabtest timeout only on the CI.
# This is a suspected race condition
fi_bw
fi_flood

# fi_msg_pingpong fails with
# fi_eq_sread(): common/shared.c:1127, ret=-4 (Interrupted system call)
Expand Down

0 comments on commit 15278bb

Please sign in to comment.