diff --git a/man/fi_efa.7.md b/man/fi_efa.7.md index 0080f35afe4..a7705098403 100644 --- a/man/fi_efa.7.md +++ b/man/fi_efa.7.md @@ -201,6 +201,12 @@ struct fi_efa_mr_attr { # RUNTIME PARAMETERS +*FI_EFA_IFACE* +: A comma-delimited list of EFA device, i.e. NIC, names that should be visible to + the application. This paramater can be used to include/exclude NICs to enforce + process affinity based on the hardware topology. The default value is "all" which + allows all available NICs to be discovered. + *FI_EFA_TX_SIZE* : Maximum number of transmit operations before the provider returns -FI_EAGAIN. For only the RDM endpoint, this parameter will cause transmit operations to diff --git a/prov/efa/src/efa_env.c b/prov/efa/src/efa_env.c index 84614a06426..484f544ddd6 100644 --- a/prov/efa/src/efa_env.c +++ b/prov/efa/src/efa_env.c @@ -9,6 +9,7 @@ #include "ofi_hmem.h" struct efa_env efa_env = { + .iface = "all", .tx_min_credits = 32, .tx_queue_size = 0, .enable_shm_transfer = 1, @@ -104,6 +105,15 @@ void efa_env_param_get(void) abort(); } + fi_param_get_str(&efa_prov, "iface", &efa_env.iface); + if (strlen(efa_env.iface) < 1) { + fprintf(stderr, + "FI_EFA_IFACE is empty. Specify full-qualified names separated by comma, " + "or \"all\" to use all available devices.\n" + "Your application will now abort.\n"); + abort(); + } + fi_param_get_int(&efa_prov, "tx_queue_size", &efa_env.tx_queue_size); fi_param_get_int(&efa_prov, "enable_shm_transfer", &efa_env.enable_shm_transfer); fi_param_get_int(&efa_prov, "use_zcpy_rx", &efa_env.use_zcpy_rx); @@ -155,6 +165,8 @@ void efa_env_param_get(void) void efa_env_define() { efa_env_define_use_device_rdma(); + fi_param_define(&efa_prov, "iface", FI_PARAM_STRING, + "A comma delimited list of case-sensitive names to restrict eligible EFA NICs (Default: all)."); fi_param_define(&efa_prov, "tx_min_credits", FI_PARAM_INT, "Defines the minimum number of credits a sender requests from a receiver (Default: 32)."); fi_param_define(&efa_prov, "tx_queue_size", FI_PARAM_INT, diff --git a/prov/efa/src/efa_env.h b/prov/efa/src/efa_env.h index 48742fe4097..6fdd83a4a21 100644 --- a/prov/efa/src/efa_env.h +++ b/prov/efa/src/efa_env.h @@ -20,6 +20,7 @@ enum efa_env_huge_page_setting }; struct efa_env { + char *iface; int tx_min_credits; int tx_queue_size; int use_zcpy_rx; @@ -80,6 +81,8 @@ struct efa_env { int use_unsolicited_write_recv; }; +extern struct efa_env efa_env; + /** * @brief Return true if the environment variable FI_EFA_USE_DEVICE_RDMA is present * @@ -93,7 +96,38 @@ static inline bool efa_env_has_use_device_rdma() { return (ret != -FI_ENODATA); } -extern struct efa_env efa_env; +/** + * @brief Return true if the NIC is allowed by FI_EFA_IFACE variable + * + * @param[in] name NIC name string + * @return true if the NIC is allowed, otherwise false + */ +static inline bool efa_env_allows_nic(const char *name) { + char *match = efa_env.iface; + char *end = efa_env.iface + strlen(efa_env.iface); + + if (name == NULL || strlen(name) < 1) + return false; + + if (!strncmp("all", efa_env.iface, 3)) + return true; + + while (match < end) { + match = strstr(match, name); + if (!match) + return false; + + if ((match > efa_env.iface && *(match - 1) != ',') || + ((match + strlen(name)) < end && *(match + strlen(name)) != ',')) { + /* Skip partial match */ + match += strlen(name); + continue; + } + return true; + } + + return false; +} void efa_env_initialize(); diff --git a/prov/efa/src/efa_user_info.c b/prov/efa/src/efa_user_info.c index 94cc97343a3..2fd6752912c 100644 --- a/prov/efa/src/efa_user_info.c +++ b/prov/efa/src/efa_user_info.c @@ -182,6 +182,9 @@ int efa_user_info_get_dgram(uint32_t version, const char *node, const char *serv for (i = 0; i < g_device_cnt; ++i) { prov_info_dgram = g_device_list[i].dgram_info; + if (!efa_env_allows_nic(prov_info_dgram->nic->device_attr->name)) + continue; + ret = efa_prov_info_compare_src_addr(node, flags, hints, prov_info_dgram); if (ret) continue; @@ -533,6 +536,9 @@ int efa_user_info_get_rdm(uint32_t version, const char *node, if (prov_info->ep_attr->type != FI_EP_RDM) continue; + if (!efa_env_allows_nic(prov_info->nic->device_attr->name)) + continue; + ret = efa_prov_info_compare_src_addr(node, flags, hints, prov_info); if (ret) continue; diff --git a/prov/efa/test/efa_unit_test_info.c b/prov/efa/test/efa_unit_test_info.c index 2276148fa66..400f3f286e4 100644 --- a/prov/efa/test/efa_unit_test_info.c +++ b/prov/efa/test/efa_unit_test_info.c @@ -498,6 +498,145 @@ void test_use_device_rdma( const int env_val, return; } +/** + * Get the name of the "first"(random order) NIC + * + * @param[out] name The returned name string. + * It should be free'd after use. + * @returns FI_SUCCESS on success or a non-zero error code + */ +static int get_first_nic_name(char **name) { + int ret; + char *nic_name = NULL; + struct fi_info *hints, *info; + + hints = efa_unit_test_alloc_hints(FI_EP_RDM); + ret = fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, hints, &info); + fi_freeinfo(hints); + if (ret) + return ret; + + nic_name = info->nic->device_attr->name; + assert_non_null(nic_name); + assert_int_not_equal(strlen(nic_name), 0); + + *name = malloc(strlen(nic_name) + 1); + if (!name) + return FI_ENOMEM; + + strcpy(*name, nic_name); + + fi_freeinfo(info); + + return FI_SUCCESS; +} + +/** + * Verify the returned NIC from fi_getinfo. + * Ideally we want to test multi-NIC selection logic, but this test is most likely + * run on single-NIC platforms. Therefore we make a compromise and only verify the + * "first" NIC. + * + * @param[in] filter The value that would be set for FI_EFA_IFACE + * @param[in] expect_first_name The expected name of the "first" NIC + */ +static void test_efa_nic_selection(const char *filter, const char *expect_first_name) { + int ret; + struct fi_info *hints, *info; + + efa_env.iface = (char *) filter; + hints = efa_unit_test_alloc_hints(FI_EP_RDM); + ret = fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, hints, &info); + fi_freeinfo(hints); + if (expect_first_name) { + assert_int_equal(FI_SUCCESS, ret); + assert_string_equal(expect_first_name, info->nic->device_attr->name); + fi_freeinfo(info); + } else { + assert_int_not_equal(FI_SUCCESS, ret); + } +} + +/** + * Verify NICs are returned if FI_EFA_IFACE=all + */ +void test_efa_nic_select_all_devices_matches() { + int ret; + char *nic_name; + + ret = get_first_nic_name(&nic_name); + assert_int_equal(ret, FI_SUCCESS); + + test_efa_nic_selection("all", nic_name); + + free(nic_name); +} + +/** + * Verify the "first" NIC can be selected by name + */ +void test_efa_nic_select_first_device_matches() { + int ret; + char *nic_name; + + ret = get_first_nic_name(&nic_name); + assert_int_equal(ret, FI_SUCCESS); + + test_efa_nic_selection(nic_name, nic_name); + + free(nic_name); +} + +/** + * Verify that surrounding commas are handled correctly, + * i.e. ignored, to match the NIC name. + */ +void test_efa_nic_select_first_device_with_surrounding_comma_matches() { + int ret; + char *nic_name, *filter; + + ret = get_first_nic_name(&nic_name); + assert_int_equal(ret, FI_SUCCESS); + + filter = malloc(strlen(nic_name) + 3); + assert_non_null(filter); + + strcpy(filter, ","); + strcat(filter, nic_name); + strcat(filter, ","); + + test_efa_nic_selection(filter, nic_name); + + free(filter); + free(nic_name); +} + +/** + * Verify that only full NIC names are matched, and prefixes, + * e.g. the first letter, will not accidentally select the wrong NIC. + */ +void test_efa_nic_select_first_device_first_letter_no_match() { + int ret; + char *nic_name, filter[2]; + + ret = get_first_nic_name(&nic_name); + assert_int_equal(ret, FI_SUCCESS); + + filter[0] = nic_name[0]; + filter[1] = '\0'; + + test_efa_nic_selection(filter, NULL); + + free(nic_name); +} + +/** + * Verify that empty NIC names will not select any NIC + */ +void test_efa_nic_select_empty_device_no_match() { + test_efa_nic_selection(",", NULL); +} + /* indicates the test shouldn't set the setopt or environment variable during setup. */ const int VALUE_NOT_SET = -1; diff --git a/prov/efa/test/efa_unit_tests.c b/prov/efa/test/efa_unit_tests.c index f45748dea4e..b2aeb1faf47 100644 --- a/prov/efa/test/efa_unit_tests.c +++ b/prov/efa/test/efa_unit_tests.c @@ -130,6 +130,11 @@ int main(void) cmocka_unit_test_setup_teardown(test_info_check_hmem_cuda_support_on_api_lt_1_18, NULL, NULL), cmocka_unit_test_setup_teardown(test_info_check_hmem_cuda_support_on_api_ge_1_18, NULL, NULL), cmocka_unit_test_setup_teardown(test_info_check_no_hmem_support_when_not_requested, NULL, NULL), + cmocka_unit_test_setup_teardown(test_efa_nic_select_all_devices_matches, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_nic_select_first_device_matches, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_nic_select_first_device_with_surrounding_comma_matches, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_nic_select_first_device_first_letter_no_match, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_nic_select_empty_device_no_match, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_use_device_rdma_env1_opt1, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_use_device_rdma_env0_opt0, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_use_device_rdma_env1_opt0, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), diff --git a/prov/efa/test/efa_unit_tests.h b/prov/efa/test/efa_unit_tests.h index a029daae424..30e02771653 100644 --- a/prov/efa/test/efa_unit_tests.h +++ b/prov/efa/test/efa_unit_tests.h @@ -145,6 +145,11 @@ void test_info_check_hmem_cuda_support_on_api_lt_1_18(); void test_info_check_hmem_cuda_support_on_api_ge_1_18(); void test_info_check_no_hmem_support_when_not_requested(); void test_efa_hmem_info_update_neuron(); +void test_efa_nic_select_all_devices_matches(); +void test_efa_nic_select_first_device_matches(); +void test_efa_nic_select_first_device_with_surrounding_comma_matches(); +void test_efa_nic_select_first_device_first_letter_no_match(); +void test_efa_nic_select_empty_device_no_match(); void test_efa_use_device_rdma_env1_opt1(); void test_efa_use_device_rdma_env0_opt0(); void test_efa_use_device_rdma_env1_opt0();