Skip to content

Commit

Permalink
prov/verbs: Async route resolution and non-blocking EP creation
Browse files Browse the repository at this point in the history
The patch implements the last changes to make the EP creation
non-blocking.

Address is now resolved asynchronously when connection is established,
which does no longer block the caller. The connection initiator creates
the QP when the route is resolved. As for the connection target, it
creates the QP when the EP is enabled (same as it was before).

Signed-off-by: Sylvain Didelot <[email protected]>
  • Loading branch information
sydidelot committed Nov 13, 2023
1 parent 59071c0 commit 8ac7c24
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 32 deletions.
12 changes: 8 additions & 4 deletions prov/verbs/src/verbs_cm.c
Original file line number Diff line number Diff line change
Expand Up @@ -190,11 +190,15 @@ vrb_msg_ep_connect(struct fid_ep *ep_fid, const void *addr,

ofi_genlock_lock(&vrb_ep2_progress(ep)->ep_lock);
assert(ep->state == VRB_IDLE);
ep->state = VRB_RESOLVE_ROUTE;
ret = rdma_resolve_route(ep->id, VERBS_RESOLVE_TIMEOUT);
if (ret) {
ep->state = VRB_RESOLVE_ADDR;
if (rdma_resolve_addr(ep->id, ep->info_attr.src_addr,
ep->info_attr.dest_addr, VERBS_RESOLVE_TIMEOUT)) {
ret = -errno;
VRB_WARN_ERRNO(FI_LOG_EP_CTRL, "rdma_resolve_route");
VRB_WARN_ERRNO(FI_LOG_EP_CTRL, "rdma_resolve_addr");
ofi_straddr_log(&vrb_prov, FI_LOG_WARN, FI_LOG_EP_CTRL,
"src addr", ep->info_attr.src_addr);
ofi_straddr_log(&vrb_prov, FI_LOG_WARN, FI_LOG_EP_CTRL,
"dst addr", ep->info_attr.dest_addr);
free(ep->cm_priv_data);
ep->cm_priv_data = NULL;
ep->state = VRB_IDLE;
Expand Down
18 changes: 10 additions & 8 deletions prov/verbs/src/verbs_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -1033,15 +1033,17 @@ static int vrb_ep_enable(struct fid_ep *ep_fid)
return -FI_EINVAL;
}

ret = rdma_create_qp(ep->id, domain->pd, &attr);
if (ret) {
VRB_WARN_ERRNO(FI_LOG_EP_CTRL, "rdma_create_qp");
return -errno;
}
if (ep->state == VRB_REQ_RCVD) {
ret = rdma_create_qp(ep->id, domain->pd, &attr);
if (ret) {
VRB_WARN_ERRNO(FI_LOG_EP_CTRL, "rdma_create_qp");
return -errno;
}

/* Allow shared XRC INI QP not controlled by RDMA CM
* to share same post functions as RC QP. */
ep->ibv_qp = ep->id->qp;
/* Allow shared XRC INI QP not controlled by RDMA CM
* to share same post functions as RC QP. */
ep->ibv_qp = ep->id->qp;
}
break;
case FI_EP_DGRAM:
assert(domain);
Expand Down
20 changes: 0 additions & 20 deletions prov/verbs/src/verbs_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -338,29 +338,9 @@ int vrb_create_ep(struct vrb_ep *ep, enum rdma_port_space ps,
goto err1;
}

/* TODO convert this call to non-blocking (use event channel) as well:
* This may likely be needed for better scaling when running large
* MPI jobs.
* Making this non-blocking would mean we can't create QP at EP enable
* time. We need to wait for RDMA_CM_EVENT_ADDR_RESOLVED event before
* creating the QP using rdma_create_qp. It would also require a SW
* receive queue to store recvs posted by app after enabling the EP.
*/
if (rdma_resolve_addr(*id, rai->ai_src_addr, rai->ai_dst_addr,
VERBS_RESOLVE_TIMEOUT)) {
ret = -errno;
VRB_WARN_ERRNO(FI_LOG_EP_CTRL, "rdma_resolve_addr");
ofi_straddr_log(&vrb_prov, FI_LOG_WARN, FI_LOG_EP_CTRL,
"src addr", rai->ai_src_addr);
ofi_straddr_log(&vrb_prov, FI_LOG_WARN, FI_LOG_EP_CTRL,
"dst addr", rai->ai_dst_addr);
goto err2;
}
rdma_freeaddrinfo(rai);
return 0;

err2:
rdma_destroy_id(*id);
err1:
rdma_freeaddrinfo(rai);
return ret;
Expand Down

0 comments on commit 8ac7c24

Please sign in to comment.