Skip to content

Commit

Permalink
prov/efa: Avoid checking p2p support when p2p is disabled
Browse files Browse the repository at this point in the history
We do not need to check p2p support when application disables p2p
with FI_HMEM_DISABLE_P2P.

Signed-off-by: Jessie Yang <[email protected]>
  • Loading branch information
jiaxiyan committed Aug 5, 2024
1 parent e3c91ae commit 99350ce
Showing 1 changed file with 37 additions and 19 deletions.
56 changes: 37 additions & 19 deletions prov/efa/src/efa_hmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ static int efa_domain_hmem_info_init_protocol_thresholds(struct efa_domain *efa_
struct efa_hmem_info *info = &efa_domain->hmem_info[iface];

/* Fall back to FI_HMEM_SYSTEM initialization logic when p2p is unavailable */
if (!info->p2p_supported_by_device)
if (ofi_hmem_p2p_disabled() || !info->p2p_supported_by_device)
iface = FI_HMEM_SYSTEM;

switch (iface) {
Expand Down Expand Up @@ -123,16 +123,7 @@ static int efa_domain_hmem_info_init_cuda(struct efa_domain *efa_domain)
return 0;
}

cuda_ret = ofi_cudaMalloc(&ptr, len);
if (cuda_ret != cudaSuccess) {
EFA_WARN(FI_LOG_DOMAIN,
"Failed to allocate CUDA buffer: %s\n",
ofi_cudaGetErrorString(cuda_ret));
return 0;
}

info->initialized = true;
info->p2p_disabled_by_user = false;

/* If user is using libfabric API 1.18 or later, by default EFA provider is permitted to
* use CUDA library to support CUDA memory, therefore p2p is not required.
Expand All @@ -142,6 +133,23 @@ static int efa_domain_hmem_info_init_cuda(struct efa_domain *efa_domain)
else
info->p2p_required_by_impl = true;

info->p2p_disabled_by_user = ofi_hmem_p2p_disabled();

if (info->p2p_disabled_by_user) {
/* Don't need to check p2p support */
efa_domain_hmem_info_init_protocol_thresholds(efa_domain, FI_HMEM_CUDA);
return 0;
}

cuda_ret = ofi_cudaMalloc(&ptr, len);
if (cuda_ret != cudaSuccess) {
info->initialized = false;
EFA_WARN(FI_LOG_DOMAIN,
"Failed to allocate CUDA buffer: %s\n",
ofi_cudaGetErrorString(cuda_ret));
return 0;
}

#if HAVE_EFA_DMABUF_MR
ret = cuda_get_dmabuf_fd(ptr, len, &dmabuf_fd, &dmabuf_offset);
if (ret == FI_SUCCESS) {
Expand Down Expand Up @@ -228,6 +236,17 @@ static int efa_domain_hmem_info_init_neuron(struct efa_domain *efa_domain)
return 0;
}

info->initialized = true;
/* Neuron currently requires P2P */
info->p2p_required_by_impl = true;
info->p2p_disabled_by_user = ofi_hmem_p2p_disabled();

if (info->p2p_disabled_by_user) {
/* Don't need to check p2p support */
efa_domain_hmem_info_init_protocol_thresholds(efa_domain, FI_HMEM_NEURON);
return 0;
}

ptr = neuron_alloc(&handle, len);
/*
* neuron_alloc will fail if application did not call nrt_init,
Expand All @@ -236,14 +255,10 @@ static int efa_domain_hmem_info_init_neuron(struct efa_domain *efa_domain)
*/
if (!ptr) {
EFA_INFO(FI_LOG_DOMAIN, "Cannot allocate Neuron buffer\n");
info->initialized = false;
return 0;
}

info->initialized = true;
info->p2p_disabled_by_user = false;
/* Neuron currently requires P2P */
info->p2p_required_by_impl = true;

#if HAVE_EFA_DMABUF_MR
ret = neuron_get_dmabuf_fd(ptr, (uint64_t)len, &dmabuf_fd, &offset);
if (ret == FI_SUCCESS) {
Expand Down Expand Up @@ -317,7 +332,7 @@ static int efa_domain_hmem_info_init_synapseai(struct efa_domain *efa_domain)
}

info->initialized = true;
info->p2p_disabled_by_user = false;
info->p2p_disabled_by_user = ofi_hmem_p2p_disabled();
/* SynapseAI currently requires P2P */
info->p2p_required_by_impl = true;
info->p2p_supported_by_device = true;
Expand Down Expand Up @@ -365,7 +380,7 @@ int efa_domain_hmem_validate_p2p_opt(struct efa_domain *efa_domain, enum fi_hmem

switch (p2p_opt) {
case FI_HMEM_P2P_REQUIRED:
if (!info->p2p_supported_by_device)
if (OFI_UNLIKELY(ofi_hmem_p2p_disabled()) || !info->p2p_supported_by_device)
return -FI_EOPNOTSUPP;

info->p2p_disabled_by_user = false;
Expand All @@ -377,11 +392,14 @@ int efa_domain_hmem_validate_p2p_opt(struct efa_domain *efa_domain, enum fi_hmem
* PREFERED means a provider should prefer P2P if it is available.
*
* These options does not require that p2p is supported by device,
* nor do they prohibit that p2p is reqruied by implementation. Therefore
* they are always supported.
* nor do they prohibit that p2p is required by implementation.
* Therefore FI_HMEM_P2P_PREFERRED is always supported.
* FI_HMEM_P2P_ENABLED is supported unless p2p is disabled.
*/
case FI_HMEM_P2P_PREFERRED:
case FI_HMEM_P2P_ENABLED:
if (OFI_UNLIKELY(ofi_hmem_p2p_disabled()))
return -FI_EOPNOTSUPP;
info->p2p_disabled_by_user = false;
return 0;

Expand Down

0 comments on commit 99350ce

Please sign in to comment.