Skip to content

Commit

Permalink
prov/efa: re-org rma write to avoid duplicate checks
Browse files Browse the repository at this point in the history
The RMA write code previously funneled down into efa_rdm_rma_writemsg.
Each level of the code would check rma capabilities, attempt to
syncmemops, get the peer, and check to see if we could use SHM. This
patch creates a generic function to make sure these checks only happen
once on the write path.

Signed-off-by: Seth Zegelstein <[email protected]>
  • Loading branch information
a-szegel committed Dec 21, 2023
1 parent 2552f7f commit 1ead949
Showing 1 changed file with 46 additions and 38 deletions.
84 changes: 46 additions & 38 deletions prov/efa/src/rdm/efa_rdm_rma.c
Original file line number Diff line number Diff line change
Expand Up @@ -426,27 +426,59 @@ ssize_t efa_rdm_rma_post_write(struct efa_rdm_ep *ep, struct efa_rdm_ope *txe)
return efa_rdm_ope_post_send(txe, ctrl_type);
}

ssize_t efa_rdm_rma_writemsg(struct fid_ep *ep,
static inline ssize_t efa_rdm_generic_writemsg(struct efa_rdm_ep *efa_rdm_ep,
struct efa_rdm_peer *peer,
const struct fi_msg_rma *msg,
uint64_t flags)
{
ssize_t err;
struct efa_rdm_peer *peer;
struct efa_rdm_ep *efa_rdm_ep;
struct efa_rdm_ope *txe;
fi_addr_t tmp_addr;
struct fi_msg_rma *msg_clone;
void *shm_desc[EFA_RDM_IOV_LIMIT];
void **tmp_desc;
struct util_srx_ctx *srx_ctx;

efa_perfset_start(efa_rdm_ep, perf_efa_tx);

EFA_DBG(FI_LOG_EP_DATA,
"write iov_len %lu flags: %lx\n",
ofi_total_iov_len(msg->msg_iov, msg->iov_count),
flags);

efa_rdm_ep = container_of(ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid.fid);
if (peer->flags & EFA_RDM_PEER_IN_BACKOFF)
return -FI_EAGAIN;

srx_ctx = efa_rdm_ep_get_peer_srx_ctx(efa_rdm_ep);
ofi_genlock_lock(srx_ctx->lock);

txe = efa_rdm_rma_alloc_txe(efa_rdm_ep, peer, msg, ofi_op_write, flags);
if (OFI_UNLIKELY(!txe)) {
efa_rdm_ep_progress_internal(efa_rdm_ep);
err = -FI_EAGAIN;
goto out;
}

err = efa_rdm_rma_post_write(efa_rdm_ep, txe);
if (OFI_UNLIKELY(err)) {
efa_rdm_ep_progress_internal(efa_rdm_ep);
efa_rdm_txe_release(txe);
}
out:
ofi_genlock_unlock(srx_ctx->lock);
efa_perfset_end(efa_rdm_ep, perf_efa_tx);
return err;
}

ssize_t efa_rdm_rma_writemsg(struct fid_ep *ep,
const struct fi_msg_rma *msg,
uint64_t flags)
{
struct efa_rdm_peer *peer;
struct efa_rdm_ep *efa_rdm_ep;
fi_addr_t tmp_addr;
struct fi_msg_rma *msg_clone;
void *shm_desc[EFA_RDM_IOV_LIMIT];
void **tmp_desc;
int err;

efa_rdm_ep = container_of(ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid.fid);
err = efa_rdm_ep_cap_check_rma(efa_rdm_ep);
if (err)
return err;
Expand All @@ -457,17 +489,9 @@ ssize_t efa_rdm_rma_writemsg(struct fid_ep *ep,

assert(msg->iov_count <= efa_rdm_ep->tx_iov_limit);

efa_perfset_start(efa_rdm_ep, perf_efa_tx);
ofi_genlock_lock(srx_ctx->lock);

peer = efa_rdm_ep_get_peer(efa_rdm_ep, msg->addr);
assert(peer);

if (peer->flags & EFA_RDM_PEER_IN_BACKOFF) {
err = -FI_EAGAIN;
goto out;
}

if (peer->is_local && efa_rdm_ep->shm_ep) {
tmp_addr = msg->addr;
tmp_desc = msg->desc;
Expand All @@ -482,25 +506,10 @@ ssize_t efa_rdm_rma_writemsg(struct fid_ep *ep,
err = fi_writemsg(efa_rdm_ep->shm_ep, msg, flags);
msg_clone->addr = tmp_addr;
msg_clone->desc = tmp_desc;
goto out;
}

txe = efa_rdm_rma_alloc_txe(efa_rdm_ep, peer, msg, ofi_op_write, flags);
if (OFI_UNLIKELY(!txe)) {
efa_rdm_ep_progress_internal(efa_rdm_ep);
err = -FI_EAGAIN;
goto out;
return err;
}

err = efa_rdm_rma_post_write(efa_rdm_ep, txe);
if (OFI_UNLIKELY(err)) {
efa_rdm_ep_progress_internal(efa_rdm_ep);
efa_rdm_txe_release(txe);
}
out:
ofi_genlock_unlock(srx_ctx->lock);
efa_perfset_end(efa_rdm_ep, perf_efa_tx);
return err;
return efa_rdm_generic_writemsg(efa_rdm_ep, peer, msg, flags);
}

ssize_t efa_rdm_rma_writev(struct fid_ep *ep, const struct iovec *iov, void **desc,
Expand All @@ -509,7 +518,6 @@ ssize_t efa_rdm_rma_writev(struct fid_ep *ep, const struct iovec *iov, void **de
{
struct fi_rma_iov rma_iov;
struct fi_msg_rma msg;

struct efa_rdm_peer *peer;
struct efa_rdm_ep *efa_rdm_ep;
void *shm_desc[EFA_RDM_IOV_LIMIT] = {NULL};
Expand Down Expand Up @@ -545,7 +553,7 @@ ssize_t efa_rdm_rma_writev(struct fid_ep *ep, const struct iovec *iov, void **de
msg.rma_iov = &rma_iov;
msg.rma_iov_count = 1;

return efa_rdm_rma_writemsg(ep, &msg, 0);
return efa_rdm_generic_writemsg(efa_rdm_ep, peer, &msg, 0);
}

ssize_t efa_rdm_rma_write(struct fid_ep *ep, const void *buf, size_t len, void *desc,
Expand Down Expand Up @@ -625,7 +633,7 @@ ssize_t efa_rdm_rma_writedata(struct fid_ep *ep, const void *buf, size_t len,
msg.rma_iov_count = 1;
msg.data = data;

return efa_rdm_rma_writemsg(ep, &msg, FI_REMOTE_CQ_DATA);
return efa_rdm_generic_writemsg(efa_rdm_ep, peer, &msg, FI_REMOTE_CQ_DATA);
}

ssize_t efa_rdm_rma_inject_write(struct fid_ep *ep, const void *buf, size_t len,
Expand Down Expand Up @@ -660,7 +668,7 @@ ssize_t efa_rdm_rma_inject_write(struct fid_ep *ep, const void *buf, size_t len,
msg.rma_iov_count = 1;
msg.addr = dest_addr;

return efa_rdm_rma_writemsg(ep, &msg, FI_INJECT | EFA_RDM_TXE_NO_COMPLETION);
return efa_rdm_generic_writemsg(efa_rdm_ep, peer, &msg, FI_INJECT | EFA_RDM_TXE_NO_COMPLETION);
}

ssize_t efa_rdm_rma_inject_writedata(struct fid_ep *ep, const void *buf, size_t len,
Expand Down Expand Up @@ -697,7 +705,7 @@ ssize_t efa_rdm_rma_inject_writedata(struct fid_ep *ep, const void *buf, size_t
msg.addr = dest_addr;
msg.data = data;

return efa_rdm_rma_writemsg(ep, &msg, FI_INJECT | EFA_RDM_TXE_NO_COMPLETION |
return efa_rdm_generic_writemsg(efa_rdm_ep, peer, &msg, FI_INJECT | EFA_RDM_TXE_NO_COMPLETION |
FI_REMOTE_CQ_DATA);
}

Expand Down

0 comments on commit 1ead949

Please sign in to comment.