Skip to content

Commit

Permalink
core: Support multiple auth keys per EP
Browse files Browse the repository at this point in the history
fi_domain_attr::max_ep_auth_key_cnt is used to reported the number of
authorization keys supported by an endpoint. If this value is non-zero,
connectionless endpoints must implement FI_AV_AUTH_KEY.

FI_AV_AUTH_KEY is set by libfabric users via fi_ep_addr::auth_key_size
to denoted if an endpoint will be bound to an address vector with
authorization keys inserted. When set, providers will ignore
fi_ep_attr::auth_key during endpoint enable.

All eligible authorization keys must be pre-inserted into the AV via
fi_av_insert_auth_key(). Acceptable flags are the following:
- FI_TRANSMIT: Restrict the authorization key to outbound data
  transfers. This includes send message, RMA, and atomic operations.
- FI_RECV: Restrict the authorization key to inbound data transfers.
  This includes received messages and target MRs of RMA and atomic
  operations.

fi_av_insert_auth_key() output is an fi_addr_t handle specific to this
authorization key. If the EP is configured with FI_DIRECTED_RECV, this
fi_addr_t can be used to match all EP addrs associated with this
authorization key. Calling fi_av_remove() with this fi_addr_t will
delete the authorization key. -FI_EBUSY will be returned from
fi_av_remove() should this key still be used by en EP. In other words,
all EPs using this authorization key need to be closed for
fi_av_remove() to succeed.

Once the AV is bound to an EP and the EP is successfully enabled, the
EP will be configured to support all auth keys in the AV at that point
in time.

Users must provide an authorization key fi_addr_t with
fi_av_insert_{addr, svc, sym}. This is done by using the fi_addr as
input. For fi_av_insert_{addr, sym}, since fi_addr may be an array,
only index 0 will be looked at for the authorization key fi_addr_t.
That is only a single authorization key fi_addr_t will be supported
for these functions. The output of fi_av_insert_{addr, svc, sym} is
an fi_addr_t mapping to a specific <EP addr, auth_key> tuple.

For FI_EADDRNOTAVAIL CQ errors, fi_cq_err_entry::src_addr will return
the authorization key handle associated with the incoming data transfer.
This, combined with the existing behavior of fi_cq_err_entry::err_data
 enables users to generate a fi_addr_t mapping to the specific
<EP addr, auth_key> tuple which triggered the FI_EADDRNOTAVAIL event.

Since these API changes require external headers to be changed, ABI is
updated to version 1.7. Existing providers and utility code are updated
to ensure ABI compatibility.

Signed-off-by: Ian Ziemba <[email protected]>
  • Loading branch information
iziemba committed Sep 15, 2023
1 parent 1daaa2c commit 3ffc31b
Show file tree
Hide file tree
Showing 18 changed files with 303 additions and 18 deletions.
16 changes: 15 additions & 1 deletion include/ofi_abi.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ extern "C" {
* name appended with the ABI version that it is compatible with.
*/

#define CURRENT_ABI "FABRIC_1.6"
#define CURRENT_ABI "FABRIC_1.7"

#if HAVE_ALIAS_ATTRIBUTE == 1
#define DEFAULT_SYMVER_PRE(a) a##_
Expand Down Expand Up @@ -164,6 +164,20 @@ struct fi_cq_err_entry_1_0 {
void *err_data;
};

struct fi_cq_err_entry_1_1 {
void *op_context;
uint64_t flags;
size_t len;
void *buf;
uint64_t data;
uint64_t tag;
size_t olen;
int err;
int prov_errno;
/* err_data is available until the next time the CQ is read */
void *err_data;
size_t err_data_size;
};

#ifdef __cplusplus
}
Expand Down
2 changes: 2 additions & 0 deletions include/rdma/fabric.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ enum {
#define FI_ADDR_NOTAVAIL ((uint64_t) -1)
#define FI_KEY_NOTAVAIL ((uint64_t) -1)
#define FI_SHARED_CONTEXT SIZE_MAX
#define FI_AV_AUTH_KEY SIZE_MAX
typedef uint64_t fi_addr_t;

enum fi_av_type {
Expand Down Expand Up @@ -448,6 +449,7 @@ struct fi_domain_attr {
size_t max_err_data;
size_t mr_cnt;
uint32_t tclass;
size_t max_ep_auth_key_cnt;
};

struct fi_fabric_attr {
Expand Down
23 changes: 23 additions & 0 deletions include/rdma/fi_domain.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,11 @@ struct fi_ops_av {
char *buf, size_t *len);
int (*av_set)(struct fid_av *av, struct fi_av_set_attr *attr,
struct fid_av_set **av_set, void *context);
int (*insert_auth_key)(struct fid_av *av, const void *auth_key,
size_t auth_key_size, fi_addr_t *fi_addr,
uint64_t flags);
int (*lookup_auth_key)(struct fid_av *av, fi_addr_t fi_addr,
void *auth_key, size_t *auth_key_size);
};

struct fid_av {
Expand Down Expand Up @@ -523,6 +528,24 @@ fi_av_straddr(struct fid_av *av, const void *addr, char *buf, size_t *len)
return av->ops->straddr(av, addr, buf, len);
}

static inline int
fi_av_insert_auth_key(struct fid_av *av, const void *auth_key,
size_t auth_key_size, fi_addr_t *fi_addr, uint64_t flags)
{
return FI_CHECK_OP(av->ops, struct fi_ops_av, insert_auth_key) ?
av->ops->insert_auth_key(av, auth_key, auth_key_size, fi_addr,
flags) : -FI_ENOSYS;
}

static inline int
fi_av_lookup_auth_key(struct fid_av *av, fi_addr_t addr, void *auth_key,
size_t *auth_key_size)
{
return FI_CHECK_OP(av->ops, struct fi_ops_av, lookup_auth_key) ?
av->ops->lookup_auth_key(av, addr, auth_key, auth_key_size) :
-FI_ENOSYS;
}

static inline fi_addr_t
fi_rx_addr(fi_addr_t fi_addr, int rx_index, int rx_ctx_bits)
{
Expand Down
1 change: 1 addition & 0 deletions include/rdma/fi_eq.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ struct fi_cq_err_entry {
/* err_data is available until the next time the CQ is read */
void *err_data;
size_t err_data_size;
fi_addr_t src_addr;
};

enum fi_cq_wait_cond {
Expand Down
9 changes: 8 additions & 1 deletion libfabric.map.in
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,11 @@ FABRIC_1.5 {
FABRIC_1.6 {
global:
fi_log_ready;
} FABRIC_1.5;
} FABRIC_1.5;

FABRIC_1.7 {
global:
fi_getinfo;
fi_freeinfo;
fi_dupinfo;
} FABRIC_1.6;
8 changes: 8 additions & 0 deletions man/fabric.7.md
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,14 @@ call.

ABI version starting with libfabric 1.14. Added fi_log_ready for providers.

## ABI 1.7

ABI version starting with libfabric 1.20. Added new fields to the following
attributes:

*fi_domain_attr*
: Added max_ep_auth_key_cnt

# SEE ALSO

[`fi_info`(1)](fi_info.1.html),
Expand Down
80 changes: 80 additions & 0 deletions man/fi_av.3.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ fi_av_lookup
fi_av_straddr
: Convert an address into a printable string.

fi_av_insert_auth_key
: Insert an authorization key into the address vector.

fi_av_lookup_auth_key
: Retrieve an authorization key stored in the address vector.

# SYNOPSIS

```c
Expand Down Expand Up @@ -58,6 +64,12 @@ fi_addr_t fi_rx_addr(fi_addr_t fi_addr, int rx_index,

const char * fi_av_straddr(struct fid_av *av, const void *addr,
char *buf, size_t *len);

int fi_av_insert_auth_key(struct fid_av *av, const void *auth_key,
size_t auth_key_size, fi_addr_t *fi_addr, uint64_t flags);

int fi_av_lookup_auth_key(struct fid_av *av, fi_addr_t addr,
void *auth_key, size_t *auth_key_size);
```
# ARGUMENTS
Expand Down Expand Up @@ -97,6 +109,14 @@ const char * fi_av_straddr(struct fid_av *av, const void *addr,
*flags*
: Additional flags to apply to the operation.
*auth_key*
: Buffer containing authorization key to be inserted into the address
vector.
*auth_key_size*
: On input, specifies size of auth_key buffer. On output, stores number
of bytes written to auth_key buffer.
# DESCRIPTION
Address vectors are used to map higher-level addresses, which may be
Expand Down Expand Up @@ -328,6 +348,16 @@ that calls to fi_av_insert following a call to fi_av_remove always reference a
valid buffer in the fi_addr parameter. Otherwise it may be difficult to
determine what the next assigned index will be.

If the address vector is configured with authorization keys, the fi_addr
parameter cannot be NULL and is used to define the authorization key associated
with the address being inserted. Acceptable inputs are the fi_addr_t's generated
from `fi_av_insert_auth_key`. Only `fi_addr[0]` can be used to pass in an
authorization key fi_addr_t. Return returned fi_addr_t's will map to endpoint
address against the specified authorization key. These fi_addr_t's can be
used as the target for local data transfer operations. If the endpoint
supports `FI_DIRECTED_RECV`, these fi_addr_t's can be used to restrict
recieve buffers to a specific endpoint address and authorization key.

*flags*
: The following flag may be passed to AV insertion calls: fi_av_insert,
fi_av_insertsvc, or fi_av_insertsym.
Expand Down Expand Up @@ -414,6 +444,11 @@ accessed. Inserted addresses are not required to be removed.
fi_av_close will automatically cleanup any resources associated with
addresses remaining in the AV when it is invoked.

If the address being removed came from `fi_av_insert_auth_key`, the address
will only be removed if all endpoints, which have been enabled against the
corresponding authorization key, have been closed. If all endpoints are not
closed, -FI_EBUSY will be returned.

Flags are reserved for future use and must be 0.

## fi_av_lookup
Expand Down Expand Up @@ -450,6 +485,51 @@ size of the buffer needed to store the address. This size may be
larger than the input len. If the provided buffer is too small, the
results will be truncated. fi_av_straddr returns a pointer to buf.

## fi_av_insert_auth_key

This function associates authorization keys with an address vector. This
behavior, combined with endpoints opened with `FI_AV_AUTH_KEY``, enables an
endpoint to support multiple authorization keys. When an endpoint with
`FI_AV_AUTH_KEY`` is enabled, the endpoint is configured against
authorization keys inserted at that point in time. Later authorization
key insertions will not propagate to already enabled endpoints.

The `auth_key` and `auth_key_size` parameters are used to input the
authorization key into the address vector. The structure of the
authorization key is provider specific.

The output of `fi_av_insert_auth_key` is a fi_addr_t handle representing
all endpoint addresses against this specific authorization key. For
endpoints enabled with FI_DIRECTED_RECV, authorization key fi_addr_t's
inserted with `FI_RECV` can be used to restrict incoming messages to only
endpoint addresses within the authorization key.

These authorization key fi_addr_t's can later be used an input for
endpoint address insertion functions to generate an fi_addr_t for a
specific endpoint address and authorization key.

*flags*
: The following flag may be passed to `fi_av_insert_auth_key`:

- *FI_TRANSMIT*
: Restrict the authorization key to outbound data transfers. This includes
send message, RMA, and atomic operations.

- *FI_RECV*
: Restrict the authorization key to inbound data transfers. This includes
received messages and target MRs of RMA and atomic operations.

## fi_av_lookup_auth_key

This functions returns the authorization key associated with a fi_addr_t.
Acceptable fi_addr_t's input are the output of `fi_av_insert_auth_key` and
AV address insertion functions. The returned authorization key is in a
provider specific format. On input, the auth_key_size parameter should
indicate the size of the auth_key buffer. If the actual authorization key
is larger than what can fit into the buffer, it will be truncated. On
output, auth_key_size is set to the size of the buffer needed to store the
authorization key, which may be larger than the input value.

# NOTES

An AV should only store a single instance of an address.
Expand Down
6 changes: 6 additions & 0 deletions man/fi_cq.3.md
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,7 @@ struct fi_cq_err_entry {
int prov_errno; /* provider error code */
void *err_data; /* error data */
size_t err_data_size; /* size of err_data */
fi_addr_t src_addr; /* error source address */
};
```

Expand Down Expand Up @@ -447,6 +448,11 @@ Notable completion error codes are given below.
passed directly into an fi_av_insert call to add the source address
to the address vector.

For API versions 1.20 and later, if the EP is configured with
FI_AV_AUTH_KEY, src_addr will be set to the fi_addr_t authorization key
handle corresponding to the incoming data transfer. Otherwise, the
value will be set to FI_ADDR_UNSPEC.

## fi_cq_signal

The fi_cq_signal call will unblock any thread waiting in fi_cq_sread
Expand Down
7 changes: 7 additions & 0 deletions man/fi_domain.3.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ struct fi_domain_attr {
size_t max_err_data;
size_t mr_cnt;
uint32_t tclass;
size_t max_ep_auth_key_cnt;
};
```

Expand Down Expand Up @@ -772,6 +773,12 @@ This specifies the default traffic class that will be associated any endpoints
created within the domain. See [`fi_endpoint`(3)](fi_endpoint.3.html)
for additional information.

## Max Authorization Keys per Endpoint (max_ep_auth_key_cnt)

: The maximum number of authorization keys which can be supported per endpoint. If
connectionless endpoints are supported and this value is non-zero, providers must
support FI_AV_AUTH_KEY.

# RETURN VALUE

Returns 0 on success. On error, a negative value corresponding to fabric
Expand Down
5 changes: 5 additions & 0 deletions man/fi_endpoint.3.md
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,11 @@ The length of the authorization key in bytes. This field will be 0 if
authorization keys are not available or used. This field is ignored
unless the fabric is opened with API version 1.5 or greater.
If the size is set to the value FI_AV_AUTH_KEY, the endpoint will be
configured to use authorization keys assocaited with the AV. Providers
which support authorization keys and connectionless endpoint must support
this option.
## auth_key - Authorization Key
If supported by the fabric, an authorization key (a.k.a. job
Expand Down
7 changes: 5 additions & 2 deletions prov/bgq/src/fi_bgq_cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ static ssize_t
fi_bgq_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf, uint64_t flags)
{
struct fi_bgq_cq *bgq_cq = container_of(cq, struct fi_bgq_cq, cq_fid);
uint32_t api_version = bgq_cq->domain->fabric->fabric_fid.api_version;
size_t size = FI_VERSION_GE(api_version, FI_VERSION(1, 20)) ?
sizeof(*buf) : sizeof(struct fi_cq_err_entry_1_1);

if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) {

Expand Down Expand Up @@ -186,7 +189,7 @@ fi_bgq_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf, uint64_t flags
if (NULL == bgq_cq->err_head)
bgq_cq->err_tail = NULL;

*buf = ext->err_entry;
memcpy(buf, &ext->err_entry, size);
free(ext);

ret = fi_bgq_unlock_if_required(&bgq_cq->lock, lock_required);
Expand All @@ -213,7 +216,7 @@ fi_bgq_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf, uint64_t flags

assert(ext->bgq_context.flags & FI_BGQ_CQ_CONTEXT_EXT); /* DEBUG */

*buf = ext->err_entry;
memcpy(buf, &ext->err_entry, size);
free(ext);

l2atomic_fifo_advance(&bgq_cq->err_consumer);
Expand Down
5 changes: 4 additions & 1 deletion prov/opx/include/rdma/opx/fi_opx_cq_ops_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ fi_opx_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf, uint64_t flags
FI_DBG_TRACE(fi_opx_global.prov, FI_LOG_CQ, "(begin)\n");

struct fi_opx_cq *opx_cq = container_of(cq, struct fi_opx_cq, cq_fid);
uint32_t api_version = opx_cq->domain->fabric->fabric_fid.api_version;
size_t size = FI_VERSION_GE(api_version, FI_VERSION(1, 20)) ?
sizeof(*buf) : sizeof(struct fi_cq_err_entry_1_1);

if (IS_PROGRESS_MANUAL(opx_cq->domain)) {

Expand All @@ -72,7 +75,7 @@ fi_opx_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf, uint64_t flags
const int lock_required = fi_opx_threading_lock_required(threading, fi_opx_global.progress);

fi_opx_lock_if_required(&opx_cq->lock, lock_required);
*buf = ext->err_entry;
memcpy(buf, &ext->err_entry, size);
slist_remove_head((struct slist *)&opx_cq->err);
free(ext);
ext = NULL;
Expand Down
9 changes: 7 additions & 2 deletions prov/psm2/src/psmx2_cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -1693,8 +1693,13 @@ STATIC ssize_t psmx2_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf,
if (cq_priv->pending_error) {
api_version = cq_priv->domain->fabric->util_fabric.
fabric_fid.api_version;
size = FI_VERSION_GE(api_version, FI_VERSION(1, 5)) ?
sizeof(*buf) : sizeof(struct fi_cq_err_entry_1_0);

if (FI_VERSION_GE(api_version, FI_VERSION(1, 20)))
size = sizeof(*buf);
else if (FI_VERSION_GE(api_version, FI_VERSION(1, 5)))
size = sizeof(struct fi_cq_err_entry_1_1);
else
size = sizeof(struct fi_cq_err_entry_1_0);

memcpy(buf, &cq_priv->pending_error->cqe, size);
free(cq_priv->pending_error);
Expand Down
9 changes: 7 additions & 2 deletions prov/psm3/src/psmx3_cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -963,8 +963,13 @@ STATIC ssize_t psmx3_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf,
if (cq_priv->pending_error) {
api_version = cq_priv->domain->fabric->util_fabric.
fabric_fid.api_version;
size = FI_VERSION_GE(api_version, FI_VERSION(1, 5)) ?
sizeof(*buf) : sizeof(struct fi_cq_err_entry_1_0);

if (FI_VERSION_GE(api_version, FI_VERSION(1, 20)))
size = sizeof(*buf);
else if (FI_VERSION_GE(api_version, FI_VERSION(1, 5)))
size = sizeof(struct fi_cq_err_entry_1_1);
else
size = sizeof(struct fi_cq_err_entry_1_0);

memcpy(buf, &cq_priv->pending_error->cqe, size);
free(cq_priv->pending_error);
Expand Down
7 changes: 6 additions & 1 deletion prov/sockets/src/sock_cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ static ssize_t sock_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf,
uint32_t api_version;
size_t err_data_size = 0;
void *err_data = NULL;
size_t size;

sock_cq = container_of(cq, struct sock_cq, cq_fid);
if (sock_cq->domain->progress_mode == FI_PROGRESS_MANUAL)
Expand All @@ -449,7 +450,11 @@ static ssize_t sock_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf,
&& buf->err_data && buf->err_data_size) {
err_data = buf->err_data;
err_data_size = buf->err_data_size;
*buf = entry;

size = FI_VERSION_GE(api_version, FI_VERSION(1, 20)) ?
sizeof(*buf) : sizeof(struct fi_cq_err_entry_1_1);
memcpy(buf, &entry, size);

buf->err_data = err_data;

/* Fill provided user's buffer */
Expand Down
Loading

0 comments on commit 3ffc31b

Please sign in to comment.