Skip to content

Commit

Permalink
core: Add new peer group feature
Browse files Browse the repository at this point in the history
Introduce the concept of peer groups.  A peer group is a set of
peers that are communicating together for some specific set of tasks.
Peer groups provide a lower-level mapping of HPC and AI communicators.

Signed-off-by: Sean Hefty <[email protected]>
Signed-off-by: Jianxin Xiong <[email protected]>
  • Loading branch information
shefty authored and j-xiong committed May 12, 2024
1 parent f0bb9c1 commit 9ed5df4
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 0 deletions.
1 change: 1 addition & 0 deletions include/rdma/fabric.h
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ struct fi_domain_attr {
size_t mr_cnt;
uint32_t tclass;
size_t max_ep_auth_key;
uint32_t max_group_id;
};

struct fi_fabric_attr {
Expand Down
6 changes: 6 additions & 0 deletions include/rdma/fi_domain.h
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,12 @@ fi_rx_addr(fi_addr_t fi_addr, int rx_index, int rx_ctx_bits)
return (fi_addr_t) (((uint64_t) rx_index << (64 - rx_ctx_bits)) | fi_addr);
}

static inline fi_addr_t
fi_group_addr(fi_addr_t fi_addr, uint32_t group_id)
{
return (fi_addr_t) (((uint64_t) group_id << 32) | fi_addr);
}

#endif

#ifdef __cplusplus
Expand Down
29 changes: 29 additions & 0 deletions man/fi_av.3.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ int fi_av_lookup(struct fid_av *av, fi_addr_t fi_addr,
fi_addr_t fi_rx_addr(fi_addr_t fi_addr, int rx_index,
int rx_ctx_bits);

fi_addr_t fi_group_addr(fi_addr_t fi_addr, uint32_t group_id);

const char * fi_av_straddr(struct fid_av *av, const void *addr,
char *buf, size_t *len);

Expand Down Expand Up @@ -590,6 +592,33 @@ transfer operations.
For address vectors opened with FI_AV_USER_ID, fi_av_set_user_id is used
to defined the user-specified fi_addr_t.

# PEER GROUPS

Peer groups provide a direct mapping to HPC and AI communicator constructs.

The addresses in an AV represent the full set of peers that a local process
may communicate with. A peer group conceptually represents a subset of
those peers. A peer group may be used to identify peers working on a common
task, which need their communication logically separated from other traffic.
Peer groups are not a security mechanism, but instead help separate data.
A given peer may belong to 0 or more peer groups,
with no limit placed on how many peers can belong to a single peer group.

Peer groups are identified using an integer value, known as a group id.
Group id's are selected by the user and conveyed as part of an fi_addr_t
value. The management of a group id and it's relationship to addresses
inserted into an AV is directly controlled by the user. When enabled,
sent messages are marked as belonging to a specific peer group, and posted
receive buffers must have a matching group id to receive the data.

Users are responsible for selecting a valid peer group id, subject to the
limitation negotiated using the domain attribute max_group_id. The group
id of an fi_addr_t may be set using the fi_group_addr() function.

## fi_group_addr

This function is used to set the group ID portion of an fi_addr_t.

# RETURN VALUES

Insertion calls, excluding `fi_av_insert_auth_key`, will return the number
Expand Down
10 changes: 10 additions & 0 deletions man/fi_domain.3.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ struct fi_domain_attr {
size_t mr_cnt;
uint32_t tclass;
size_t max_ep_auth_key;
uint32_t max_group_id;
};
```

Expand Down Expand Up @@ -747,6 +748,15 @@ for additional information.
: The maximum number of authorization keys which can be supported per connectionless
endpoint.

## Maximum Peer Group Id (max_group_id)

The maximum value that a peer group may be assigned, inclusive. Valid peer
group id's must be between 0 and max_group_id. See [`fi_av`(3)](fi_av.3.html)
for additional information on peer groups and their use. Users may request
support for peer groups by setting this to a non-zero value. Providers that
cannot meet the requested max_group_id will fail fi_getinfo(). On output,
providers may return a value higher than that requested by the application.

# RETURN VALUE

Returns 0 on success. On error, a negative value corresponding to fabric
Expand Down
107 changes: 107 additions & 0 deletions src/abi_1_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,61 @@ struct fi_info_1_3 {
struct fid_nic_1_3 *nic;
};

struct fi_domain_attr_1_7 {
struct fid_domain *domain;
char *name;
enum fi_threading threading;
enum fi_progress control_progress;
enum fi_progress data_progress;
enum fi_resource_mgmt resource_mgmt;
enum fi_av_type av_type;
int mr_mode;
size_t mr_key_size;
size_t cq_data_size;
size_t cq_cnt;
size_t ep_cnt;
size_t tx_ctx_cnt;
size_t rx_ctx_cnt;
size_t max_ep_tx_ctx;
size_t max_ep_rx_ctx;
size_t max_ep_stx_ctx;
size_t max_ep_srx_ctx;
size_t cntr_cnt;
size_t mr_iov_limit;
uint64_t caps;
uint64_t mode;
uint8_t *auth_key;
size_t auth_key_size;
size_t max_err_data;
size_t mr_cnt;
uint32_t tclass;
size_t max_ep_auth_key;
};

#define fi_tx_attr_1_7 fi_tx_attr_1_3
#define fi_rx_attr_1_7 fi_rx_attr_1_3
#define fi_ep_attr_1_7 fi_ep_attr_1_3
#define fi_fabric_attr_1_7 fi_fabric_attr_1_3
#define fid_nic_1_7 fid_nic_1_3

struct fi_info_1_7 {
struct fi_info *next;
uint64_t caps;
uint64_t mode;
uint32_t addr_format;
size_t src_addrlen;
size_t dest_addrlen;
void *src_addr;
void *dest_addr;
fid_t handle;
struct fi_tx_attr_1_7 *tx_attr;
struct fi_rx_attr_1_7 *rx_attr;
struct fi_ep_attr_1_7 *ep_attr;
struct fi_domain_attr_1_7 *domain_attr;
struct fi_fabric_attr_1_7 *fabric_attr;
struct fid_nic_1_7 *nic;
};

#define ofi_dup_attr(dst, src) \
do { \
dst = calloc(1, sizeof(*dst)); \
Expand Down Expand Up @@ -621,3 +676,55 @@ int fi_getinfo_1_3(uint32_t version, const char *node, const char *service,
return ret;
}
COMPAT_SYMVER(fi_getinfo_1_3, fi_getinfo, FABRIC_1.3);

/*
* ABI 1.7
*/
__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
void fi_freeinfo_1_7(struct fi_info_1_7 *info)
{
fi_freeinfo((struct fi_info *) info);
}
COMPAT_SYMVER(fi_freeinfo_1_7, fi_freeinfo, FABRIC_1.7);

__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
struct fi_info_1_3 *fi_dupinfo_1_7(const struct fi_info_1_7 *info)
{
struct fi_info *dup, *base;

if (!info)
return (struct fi_info_1_7 *) ofi_allocinfo_internal();

ofi_dup_info(base, info);
if (base == NULL)
return NULL;

dup = fi_dupinfo(base);

ofi_free_info(base);
return (struct fi_info_1_7 *) dup;
}
COMPAT_SYMVER(fi_dupinfo_1_7, fi_dupinfo, FABRIC_1.7);

__attribute__((visibility ("default"),EXTERNALLY_VISIBLE))
int fi_getinfo_1_7(uint32_t version, const char *node, const char *service,
uint64_t flags, const struct fi_info_1_7 *hints_1_7,
struct fi_info_1_7 **info)
{
struct fi_info *hints;
int ret;

if (hints_1_7) {
hints = (struct fi_info *) fi_dupinfo_1_7(hints_1_7);
if (!hints)
return -FI_ENOMEM;
} else {
hints = NULL;
}
ret = fi_getinfo(version, node, service, flags, hints,
(struct fi_info **) info);
fi_freeinfo(hints);

return ret;
}
COMPAT_SYMVER(fi_getinfo_1_7, fi_getinfo, FABRIC_1.7);

0 comments on commit 9ed5df4

Please sign in to comment.