Skip to content

Commit

Permalink
util: Update util CQ err_data handling
Browse files Browse the repository at this point in the history
The current version of util CQ err_data handling does not properly
handle if the user err_data_size is zero. Fix this by setting err_data
to a CQ owned err_data buffer.

Signed-off-by: Ian Ziemba <[email protected]>
  • Loading branch information
iziemba authored and j-xiong committed Oct 27, 2023
1 parent 2942f2a commit 2786c8d
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 0 deletions.
11 changes: 11 additions & 0 deletions include/ofi_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,11 @@ struct util_cq {

struct fid_peer_cq *peer_cq;

/* Error data buffer used to support API version 1.5 and if the user
* provider err_data_size is zero.
*/
void *err_data;

/* Only valid if not FI_PEER */
struct util_comp_cirq *cirq;
fi_addr_t *src;
Expand Down Expand Up @@ -556,6 +561,12 @@ ssize_t ofi_cq_read_entries(struct util_cq *cq, void *buf, size_t count,
ssize_t i;

ofi_genlock_lock(&cq->cq_lock);

if (cq->err_data) {
free(cq->err_data);
cq->err_data = NULL;
}

if (ofi_cirque_isempty(cq->cirq)) {
i = -FI_EAGAIN;
goto out;
Expand Down
29 changes: 29 additions & 0 deletions prov/util/src/util_cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -284,11 +284,18 @@ ssize_t ofi_cq_readerr(struct fid_cq *cq_fid, struct fi_cq_err_entry *buf,
struct util_cq *cq;
uint32_t api_version;
ssize_t ret;
size_t err_data_size = buf->err_data_size;

cq = container_of(cq_fid, struct util_cq, cq_fid);
api_version = cq->domain->fabric->fabric_fid.api_version;

ofi_genlock_lock(&cq->cq_lock);

if (cq->err_data) {
free(cq->err_data);
cq->err_data = NULL;
}

if (ofi_cirque_isempty(cq->cirq) ||
!(ofi_cirque_head(cq->cirq)->flags & UTIL_FLAG_AUX)) {
ret = -FI_EAGAIN;
Expand All @@ -307,6 +314,22 @@ ssize_t ofi_cq_readerr(struct fid_cq *cq_fid, struct fi_cq_err_entry *buf,

ofi_cq_err_memcpy(api_version, buf, &aux_entry->comp);

/* For compatibility purposes, if err_data_size is 0 on input,
* output err_data will be set to a data buffer owned by the provider.
*/
if (aux_entry->comp.err_data_size &&
(err_data_size == 0 || FI_VERSION_LT(api_version, FI_VERSION(1, 5)))) {
cq->err_data = mem_dup(aux_entry->comp.err_data,
aux_entry->comp.err_data_size);
if (!cq->err_data) {
ret = -FI_ENOMEM;
goto unlock;
}

buf->err_data = cq->err_data;
buf->err_data_size = aux_entry->comp.err_data_size;
}

slist_remove_head(&cq->aux_queue);
if (aux_entry->comp.err_data_size)
free(aux_entry->comp.err_data);
Expand Down Expand Up @@ -420,6 +443,11 @@ int ofi_cq_cleanup(struct util_cq *cq)
fi_close(&cq->wait->wait_fid.fid);
}

if (cq->err_data) {
free(cq->err_data);
cq->err_data = NULL;
}

ofi_genlock_destroy(&cq->cq_lock);
ofi_genlock_destroy(&cq->ep_list_lock);
ofi_atomic_dec32(&cq->domain->ref);
Expand Down Expand Up @@ -691,6 +719,7 @@ int ofi_cq_init(const struct fi_provider *prov, struct fid_domain *domain,
cq->cq_fid.fid.ops = &util_cq_fi_ops;
cq->cq_fid.ops = &util_cq_ops;
cq->progress = progress;
cq->err_data = NULL;

cq->domain = container_of(domain, struct util_domain, domain_fid);
ofi_atomic_initialize32(&cq->ref, 0);
Expand Down

0 comments on commit 2786c8d

Please sign in to comment.