diff --git a/prov/efa/src/efa_hmem.c b/prov/efa/src/efa_hmem.c index c6879944227..c2474a0378c 100644 --- a/prov/efa/src/efa_hmem.c +++ b/prov/efa/src/efa_hmem.c @@ -33,7 +33,7 @@ static int efa_domain_hmem_info_init_protocol_thresholds(struct efa_domain *efa_ struct efa_hmem_info *info = &efa_domain->hmem_info[iface]; /* Fall back to FI_HMEM_SYSTEM initialization logic when p2p is unavailable */ - if (!info->p2p_supported_by_device) + if (ofi_hmem_p2p_disabled() || !info->p2p_supported_by_device) iface = FI_HMEM_SYSTEM; switch (iface) { @@ -123,16 +123,7 @@ static int efa_domain_hmem_info_init_cuda(struct efa_domain *efa_domain) return 0; } - cuda_ret = ofi_cudaMalloc(&ptr, len); - if (cuda_ret != cudaSuccess) { - EFA_WARN(FI_LOG_DOMAIN, - "Failed to allocate CUDA buffer: %s\n", - ofi_cudaGetErrorString(cuda_ret)); - return 0; - } - info->initialized = true; - info->p2p_disabled_by_user = false; /* If user is using libfabric API 1.18 or later, by default EFA provider is permitted to * use CUDA library to support CUDA memory, therefore p2p is not required. @@ -142,6 +133,23 @@ static int efa_domain_hmem_info_init_cuda(struct efa_domain *efa_domain) else info->p2p_required_by_impl = true; + info->p2p_disabled_by_user = ofi_hmem_p2p_disabled(); + + if (info->p2p_disabled_by_user) { + /* Don't need to check p2p support */ + efa_domain_hmem_info_init_protocol_thresholds(efa_domain, FI_HMEM_CUDA); + return 0; + } + + cuda_ret = ofi_cudaMalloc(&ptr, len); + if (cuda_ret != cudaSuccess) { + info->initialized = false; + EFA_WARN(FI_LOG_DOMAIN, + "Failed to allocate CUDA buffer: %s\n", + ofi_cudaGetErrorString(cuda_ret)); + return 0; + } + #if HAVE_EFA_DMABUF_MR ret = cuda_get_dmabuf_fd(ptr, len, &dmabuf_fd, &dmabuf_offset); if (ret == FI_SUCCESS) { @@ -228,6 +236,17 @@ static int efa_domain_hmem_info_init_neuron(struct efa_domain *efa_domain) return 0; } + info->initialized = true; + /* Neuron currently requires P2P */ + info->p2p_required_by_impl = true; + info->p2p_disabled_by_user = ofi_hmem_p2p_disabled(); + + if (info->p2p_disabled_by_user) { + /* Don't need to check p2p support */ + efa_domain_hmem_info_init_protocol_thresholds(efa_domain, FI_HMEM_NEURON); + return 0; + } + ptr = neuron_alloc(&handle, len); /* * neuron_alloc will fail if application did not call nrt_init, @@ -236,14 +255,10 @@ static int efa_domain_hmem_info_init_neuron(struct efa_domain *efa_domain) */ if (!ptr) { EFA_INFO(FI_LOG_DOMAIN, "Cannot allocate Neuron buffer\n"); + info->initialized = false; return 0; } - info->initialized = true; - info->p2p_disabled_by_user = false; - /* Neuron currently requires P2P */ - info->p2p_required_by_impl = true; - #if HAVE_EFA_DMABUF_MR ret = neuron_get_dmabuf_fd(ptr, (uint64_t)len, &dmabuf_fd, &offset); if (ret == FI_SUCCESS) { @@ -317,7 +332,7 @@ static int efa_domain_hmem_info_init_synapseai(struct efa_domain *efa_domain) } info->initialized = true; - info->p2p_disabled_by_user = false; + info->p2p_disabled_by_user = ofi_hmem_p2p_disabled(); /* SynapseAI currently requires P2P */ info->p2p_required_by_impl = true; info->p2p_supported_by_device = true; @@ -365,7 +380,7 @@ int efa_domain_hmem_validate_p2p_opt(struct efa_domain *efa_domain, enum fi_hmem switch (p2p_opt) { case FI_HMEM_P2P_REQUIRED: - if (!info->p2p_supported_by_device) + if (OFI_UNLIKELY(ofi_hmem_p2p_disabled()) || !info->p2p_supported_by_device) return -FI_EOPNOTSUPP; info->p2p_disabled_by_user = false; @@ -377,11 +392,14 @@ int efa_domain_hmem_validate_p2p_opt(struct efa_domain *efa_domain, enum fi_hmem * PREFERED means a provider should prefer P2P if it is available. * * These options does not require that p2p is supported by device, - * nor do they prohibit that p2p is reqruied by implementation. Therefore - * they are always supported. + * nor do they prohibit that p2p is required by implementation. + * Therefore FI_HMEM_P2P_PREFERRED is always supported. + * FI_HMEM_P2P_ENABLED is supported unless p2p is disabled. */ case FI_HMEM_P2P_PREFERRED: case FI_HMEM_P2P_ENABLED: + if (OFI_UNLIKELY(ofi_hmem_p2p_disabled())) + return -FI_EOPNOTSUPP; info->p2p_disabled_by_user = false; return 0;