From a0b54baa7065e3a6e7100b6c19f0f062ce49a323 Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Mon, 28 Nov 2022 09:33:43 +0100 Subject: [PATCH 01/10] prov/coll: Prepare coll_cq to be used in by collective providers coll_cq implementation can be reused by other collective providers. Signed-off-by: Tomasz Gromadzki --- include/ofi_coll.h | 4 ++++ prov/coll/src/coll.h | 1 + prov/coll/src/coll_cq.c | 20 +++++++++++++++++--- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/include/ofi_coll.h b/include/ofi_coll.h index d6532f0dd7d..f5f8d103f86 100644 --- a/include/ofi_coll.h +++ b/include/ofi_coll.h @@ -162,4 +162,8 @@ struct util_coll_operation { uint64_t flags; }; +int coll_cq_init(struct fid_domain *domain, struct fi_cq_attr *attr, + struct fid_cq **cq_fid, ofi_cq_progress_func progress, + void *context); + #endif // _OFI_COLL_H_ diff --git a/prov/coll/src/coll.h b/prov/coll/src/coll.h index 1fcf6db1e25..d413f5963cb 100644 --- a/prov/coll/src/coll.h +++ b/prov/coll/src/coll.h @@ -60,6 +60,7 @@ #include #include #include +#include #define COLL_IOV_LIMIT 4 #define COLL_MR_MODES (OFI_MR_BASIC_MAP | FI_MR_LOCAL) diff --git a/prov/coll/src/coll_cq.c b/prov/coll/src/coll_cq.c index 3c279b5113e..cadf5783cf9 100644 --- a/prov/coll/src/coll_cq.c +++ b/prov/coll/src/coll_cq.c @@ -68,18 +68,32 @@ static struct fi_ops_cq coll_cq_ops = { int coll_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, struct fid_cq **cq_fid, void *context) +{ + return coll_cq_init(domain, attr, cq_fid, &ofi_cq_progress, context); +} + +int coll_cq_init(struct fid_domain *domain, + struct fi_cq_attr *attr, struct fid_cq **cq_fid, + ofi_cq_progress_func progress, void *context) { struct coll_cq *cq; struct fi_peer_cq_context *peer_context = context; int ret; + const struct coll_domain *coll_domain; + const struct fi_provider* provider; + + coll_domain = container_of(domain, struct coll_domain, util_domain.domain_fid.fid); + provider = coll_domain->util_domain.fabric->prov; + + if (!attr || !(attr->flags & FI_PEER)) { - FI_WARN(&coll_prov, FI_LOG_CORE, "FI_PEER flag required\n"); + FI_WARN(provider, FI_LOG_CORE, "FI_PEER flag required\n"); return -EINVAL; } if (!peer_context || peer_context->size < sizeof(*peer_context)) { - FI_WARN(&coll_prov, FI_LOG_CORE, "invalid peer CQ context\n"); + FI_WARN(provider, FI_LOG_CORE, "invalid peer CQ context\n"); return -EINVAL; } @@ -89,7 +103,7 @@ int coll_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, cq->peer_cq = peer_context->cq; - ret = ofi_cq_init(&coll_prov, domain, attr, &cq->util_cq, &ofi_cq_progress, + ret = ofi_cq_init(provider, domain, attr, &cq->util_cq, &ofi_cq_progress, context); if (ret) goto err; From c2f60858eea20ab127daabdbfffc58f3f1445e1a Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Mon, 28 Nov 2022 22:17:45 +0100 Subject: [PATCH 02/10] prov/rxm - collective offload capabilities reported if offload provider is available Signed-off-by: Tomasz Gromadzki --- prov/rxm/src/rxm_domain.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/prov/rxm/src/rxm_domain.c b/prov/rxm/src/rxm_domain.c index dda28e23da9..8fe324f24c7 100644 --- a/prov/rxm/src/rxm_domain.c +++ b/prov/rxm/src/rxm_domain.c @@ -319,6 +319,9 @@ static int rxm_query_collective(struct fid_domain *domain, if (!rxm_domain->util_coll_domain) return -FI_ENOSYS; + if (rxm_domain->offload_coll_domain) + return fi_query_collective(rxm_domain->offload_coll_domain, + coll, attr, flags); return fi_query_collective(rxm_domain->util_coll_domain, coll, attr, flags); @@ -890,6 +893,7 @@ int rxm_domain_open(struct fid_fabric *fabric, struct fi_info *info, FI_PEER, &peer_context); if (ret) goto err5; + rxm_domain->offload_coll_mask = 1; } } From 84c6e31e0281b512c3584a83211b446f2876b8b7 Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Mon, 28 Nov 2022 22:22:03 +0100 Subject: [PATCH 03/10] prov/rxm: Collective offload fabric setup integrated into rxm fabric initialization It is rxm provider responsability to initialize collective offload provider's fabric. Otherwise collective offload functionality will not be available Signed-off-by: Tomasz Gromadzki --- prov/rxm/src/rxm_fabric.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/prov/rxm/src/rxm_fabric.c b/prov/rxm/src/rxm_fabric.c index e07fd79813e..8c78d3c2191 100644 --- a/prov/rxm/src/rxm_fabric.c +++ b/prov/rxm/src/rxm_fabric.c @@ -126,7 +126,41 @@ static int rxm_fabric_init_offload_coll(struct rxm_fabric *fabric) * silimar to rxm_fabric_init_util_coll, except that the offload * provider is discovered by feature instead of name. */ + struct fi_info *hints, *offload_coll_info; + struct fid_fabric *offload_coll_fabric; + int ret; + + hints = fi_allocinfo(); + if (!hints) + return -FI_ENOMEM; + + hints->fabric_attr->prov_name = strdup(OFI_OFFLOAD_PREFIX "sharp"); // XXX to be fixed + // provider is discovered + // by feature + if (!hints->fabric_attr->prov_name) { + fi_freeinfo(hints); + return -FI_ENOMEM; + } + + hints->mode = FI_PEER_TRANSFER; + ret = fi_getinfo(OFI_VERSION_LATEST, NULL, NULL, OFI_OFFLOAD_PROV_ONLY, + hints, &offload_coll_info); + fi_freeinfo(hints); + + if (ret) + return ret; + + ret = fi_fabric(offload_coll_info->fabric_attr, &offload_coll_fabric, NULL); + if (ret) + goto err; + + fabric->offload_coll_info = offload_coll_info; + fabric->offload_coll_fabric = offload_coll_fabric; return 0; + +err: + fi_freeinfo(offload_coll_info); + return ret; } int rxm_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, From f518e046ec3b5edbc9a85dab6017fa54f0124399 Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Tue, 29 Nov 2022 09:21:01 +0100 Subject: [PATCH 04/10] core/fabric: Collective offload provider set via FI_OFFLOAD_COLL_PROVIDER FI_OFFLOAD_PROVIDER environment variable shall be set to offload provider name to instruct libcabric to setup and use particular provider. Signed-off-by: Tomasz Gromadzki --- prov/coll/src/coll_cq.c | 4 +--- prov/rxm/src/rxm_fabric.c | 16 ++++++++-------- src/fabric.c | 7 +++++++ 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/prov/coll/src/coll_cq.c b/prov/coll/src/coll_cq.c index cadf5783cf9..f8058992f62 100644 --- a/prov/coll/src/coll_cq.c +++ b/prov/coll/src/coll_cq.c @@ -86,7 +86,6 @@ int coll_cq_init(struct fid_domain *domain, coll_domain = container_of(domain, struct coll_domain, util_domain.domain_fid.fid); provider = coll_domain->util_domain.fabric->prov; - if (!attr || !(attr->flags & FI_PEER)) { FI_WARN(provider, FI_LOG_CORE, "FI_PEER flag required\n"); return -EINVAL; @@ -103,8 +102,7 @@ int coll_cq_init(struct fid_domain *domain, cq->peer_cq = peer_context->cq; - ret = ofi_cq_init(provider, domain, attr, &cq->util_cq, &ofi_cq_progress, - context); + ret = ofi_cq_init(provider, domain, attr, &cq->util_cq, progress, context); if (ret) goto err; diff --git a/prov/rxm/src/rxm_fabric.c b/prov/rxm/src/rxm_fabric.c index 8c78d3c2191..bb56df6df88 100644 --- a/prov/rxm/src/rxm_fabric.c +++ b/prov/rxm/src/rxm_fabric.c @@ -128,20 +128,20 @@ static int rxm_fabric_init_offload_coll(struct rxm_fabric *fabric) */ struct fi_info *hints, *offload_coll_info; struct fid_fabric *offload_coll_fabric; + char *offload_coll_name; int ret; + fi_param_get_str(NULL, "offload_coll_provider", &offload_coll_name); + + if (!strlen(offload_coll_name)) { + return 0; + } + hints = fi_allocinfo(); if (!hints) return -FI_ENOMEM; - hints->fabric_attr->prov_name = strdup(OFI_OFFLOAD_PREFIX "sharp"); // XXX to be fixed - // provider is discovered - // by feature - if (!hints->fabric_attr->prov_name) { - fi_freeinfo(hints); - return -FI_ENOMEM; - } - + hints->fabric_attr->prov_name = strdup(offload_coll_name); hints->mode = FI_PEER_TRANSFER; ret = fi_getinfo(OFI_VERSION_LATEST, NULL, NULL, OFI_OFFLOAD_PROV_ONLY, hints, &offload_coll_info); diff --git a/src/fabric.c b/src/fabric.c index 1d9be3f4311..046e4206d81 100644 --- a/src/fabric.c +++ b/src/fabric.c @@ -3,6 +3,7 @@ * Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2017 Intel Corp., Inc. All rights reserved. * (C) Copyright 2020 Hewlett Packard Enterprise Development LP + * Copyright (c) 2022 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -662,6 +663,8 @@ static void ofi_find_prov_libs(void) if (ofi_has_util_prefix(prov->prov_name)) { short_prov_name = prov->prov_name + strlen(OFI_UTIL_PREFIX); + } else if (ofi_has_offload_prefix(prov->prov_name)) { + short_prov_name = prov->prov_name + strlen(OFI_OFFLOAD_PREFIX); } else { short_prov_name = prov->prov_name; } @@ -834,6 +837,10 @@ void fi_ini(void) "(default: false)"); fi_param_get_bool(NULL, "av_remove_cleanup", &ofi_av_remove_cleanup); + fi_param_define(NULL, "offload_coll_provider", FI_PARAM_STRING, + "The name of colective offload provider (default: empty - no provider)"); + + ofi_load_dl_prov(); ofi_register_provider(PSM3_INIT, NULL); From d6867580c42e0d1955f3602bc19749818c7bbb5d Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Tue, 29 Nov 2022 16:51:29 +0100 Subject: [PATCH 05/10] prov/rxm: Create eq for collective offload provider Peer provider must create peer_eq for offload provider, to allow offload provider reporting events to peer provider. Signed-off-by: Tomasz Gromadzki --- prov/rxm/src/rxm_eq.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/prov/rxm/src/rxm_eq.c b/prov/rxm/src/rxm_eq.c index 039ddf79e0a..25e5c0e41f2 100644 --- a/prov/rxm/src/rxm_eq.c +++ b/prov/rxm/src/rxm_eq.c @@ -97,6 +97,13 @@ int rxm_eq_open(struct fid_fabric *fabric_fid, struct fi_eq_attr *attr, goto err2; } + if (rxm_fabric->offload_coll_fabric) { + ret = fi_eq_open(rxm_fabric->offload_coll_fabric, &peer_attr, + &rxm_eq->offload_coll_eq, &peer_context); + if (ret) + goto err2; + } + rxm_eq->util_eq.eq_fid.fid.ops = &rxm_eq_fi_ops; *eq_fid = &rxm_eq->util_eq.eq_fid; return 0; From 875ea6b47d5c36c92bfa71761013f7f10b0aff0e Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Wed, 30 Nov 2022 15:47:49 +0100 Subject: [PATCH 06/10] prov/rxm: FI_PEER flag force fi_join() with util_coll_ep Offload provider may execute collective operations via util_coll provider. It must call fi_join() operation to get struct mc required for collective operations. It can only call fi_join() on it's peer provider (e.g. rxm). FI_PEER flag is used to inform peer provider to coll fi_join() operation for util_coll_ep Signed-off-by: Tomasz Gromadzki --- prov/rxm/src/rxm_ep.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/prov/rxm/src/rxm_ep.c b/prov/rxm/src/rxm_ep.c index 7f60a124c15..8d4262dbd24 100644 --- a/prov/rxm/src/rxm_ep.c +++ b/prov/rxm/src/rxm_ep.c @@ -390,7 +390,22 @@ static int rxm_join_coll(struct fid_ep *ep, const void *addr, uint64_t flags, rxm_ep = container_of(ep, struct rxm_ep, util_ep.ep_fid); - return fi_join(rxm_ep->util_coll_ep, addr, flags, mc, context); + //FI_PEER flag is used to force util_coll context + //where fi_join() is called from offload provider + if (flags & FI_PEER) + return fi_join(rxm_ep->util_coll_ep, addr, flags, mc, context); + if (rxm_ep->offload_coll_ep) + return fi_join(rxm_ep->offload_coll_ep, addr, flags, mc, context); +#if 0 + if (ret) + goto err_util_coll; + // It is collective offload provider responsibility to store util_coll provider mc + ret = (*mc)->fid.ops->bind(&((*mc)->fid), &(util_mc->fid), 0); + if (ret) + goto err_off_coll; +#endif + else + return fi_join(rxm_ep->util_coll_ep, addr, flags, mc, context); } static struct fi_ops_cm rxm_ops_cm = { From e1dd9cf6e7b7c76bb426cea8f2a3ecc3da65ef53 Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Mon, 5 Dec 2022 13:13:57 +0100 Subject: [PATCH 07/10] prov/rxm: Set offload_coll_mask based on actual configuration offload_coll_mask value is calculated based on the actual offload capabilities confirmed by fi_query_collective(). Signed-off-by: Tomasz Gromadzki --- prov/rxm/src/rxm_domain.c | 15 ++++++++++++++- prov/rxm/src/rxm_ep.c | 1 + 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/prov/rxm/src/rxm_domain.c b/prov/rxm/src/rxm_domain.c index 8fe324f24c7..84042899946 100644 --- a/prov/rxm/src/rxm_domain.c +++ b/prov/rxm/src/rxm_domain.c @@ -844,6 +844,8 @@ int rxm_domain_open(struct fid_fabric *fabric, struct fi_info *info, struct rxm_fabric *rxm_fabric; struct fi_info *msg_info, *base_info; struct fi_peer_domain_context peer_context; + struct fi_collective_attr attr; + int ret; rxm_domain = calloc(1, sizeof(*rxm_domain)); @@ -893,7 +895,18 @@ int rxm_domain_open(struct fid_fabric *fabric, struct fi_info *info, FI_PEER, &peer_context); if (ret) goto err5; - rxm_domain->offload_coll_mask = 1; + + attr.op = FI_MIN; + attr.datatype = FI_INT8; + attr.datatype_attr.count =1; + attr.datatype_attr.size =1; + attr.mode = 0; + for (int i = FI_BARRIER; i < FI_GATHER; i++) { + ret = fi_query_collective(rxm_domain->offload_coll_domain, + i, &attr, 0); + if (FI_SUCCESS == ret) + rxm_domain->offload_coll_mask |= BIT(i); + } } } diff --git a/prov/rxm/src/rxm_ep.c b/prov/rxm/src/rxm_ep.c index 8d4262dbd24..ae3031419eb 100644 --- a/prov/rxm/src/rxm_ep.c +++ b/prov/rxm/src/rxm_ep.c @@ -1885,6 +1885,7 @@ int rxm_endpoint(struct fid_domain *domain, struct fi_info *info, peer_context.peer_ops = NULL; if (rxm_domain->offload_coll_mask) { + rxm_fabric->offload_coll_info->mode |= FI_PEER_TRANSFER; ret = fi_endpoint(rxm_domain->offload_coll_domain, rxm_fabric->offload_coll_info, &rxm_ep->offload_coll_ep, From d288bc663b1ea8279eec5281112591bf00595a0b Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Fri, 25 Nov 2022 13:15:59 +0100 Subject: [PATCH 08/10] prov/off_s - SHARP provider scaffolding Signed-off-by: Tomasz Gromadzki --- .travis.yml | 1 + Makefile.am | 2 + README.md | 9 + configure.ac | 1 + include/ofi_prov.h | 11 + include/ofi_sharp.h | 70 +++ man/fi_sharp.7.md | 65 +++ man/man7/fi_sharp.7 | 69 +++ prov/sharp/Makefile.include | 30 ++ prov/sharp/configure.m4 | 15 + prov/sharp/src/sharp.h | 198 ++++++++ prov/sharp/src/sharp_attr.c | 104 +++++ prov/sharp/src/sharp_coll.c | 799 ++++++++++++++++++++++++++++++++ prov/sharp/src/sharp_cq.c | 119 +++++ prov/sharp/src/sharp_domain.c | 219 +++++++++ prov/sharp/src/sharp_ep.c | 231 +++++++++ prov/sharp/src/sharp_eq.c | 102 ++++ prov/sharp/src/sharp_fabric.c | 109 +++++ prov/sharp/src/sharp_init.c | 95 ++++ prov/sharp/src/sharp_progress.c | 53 +++ src/fabric.c | 3 +- src/fi_tostr.c | 1 + util/info.c | 9 +- 23 files changed, 2313 insertions(+), 2 deletions(-) create mode 100644 include/ofi_sharp.h create mode 100644 man/fi_sharp.7.md create mode 100644 man/man7/fi_sharp.7 create mode 100644 prov/sharp/Makefile.include create mode 100644 prov/sharp/configure.m4 create mode 100644 prov/sharp/src/sharp.h create mode 100644 prov/sharp/src/sharp_attr.c create mode 100644 prov/sharp/src/sharp_coll.c create mode 100644 prov/sharp/src/sharp_cq.c create mode 100644 prov/sharp/src/sharp_domain.c create mode 100644 prov/sharp/src/sharp_ep.c create mode 100644 prov/sharp/src/sharp_eq.c create mode 100644 prov/sharp/src/sharp_fabric.c create mode 100644 prov/sharp/src/sharp_init.c create mode 100644 prov/sharp/src/sharp_progress.c diff --git a/.travis.yml b/.travis.yml index ed7308431ac..6d9e2204c5c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -104,6 +104,7 @@ install: --disable-udp --disable-usnic --disable-verbs + --disable-sharp - make -j2 $MAKE_FLAGS - make install - make test diff --git a/Makefile.am b/Makefile.am index deb7ce6f013..c73068d57da 100644 --- a/Makefile.am +++ b/Makefile.am @@ -3,6 +3,7 @@ # Copyright (c) 2017-2018 Intel Corporation, Inc. All right reserved. # Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All rights reserved. # (C) Copyright 2020 Hewlett Packard Enterprise Development LP +# Copyright (c) 2022 Intel Corporation. All right reserved. # # Makefile.am for libfabric @@ -449,6 +450,7 @@ include prov/rxd/Makefile.include include prov/bgq/Makefile.include include prov/opx/Makefile.include include prov/shm/Makefile.include +include prov/sharp/Makefile.include include prov/tcp/Makefile.include include prov/net/Makefile.include include prov/rstream/Makefile.include diff --git a/README.md b/README.md index a1e6ea1681d..a32fae56c92 100644 --- a/README.md +++ b/README.md @@ -211,6 +211,15 @@ over MSG endpoints of a core provider. See [`fi_rxm`(7)](https://ofiwg.github.io/libfabric/main/man/fi_rxm.7.html) for more information. +### sharp + +*** + +The `off_sharp` provider is an utility provider that supports collective endpoints utilizing +SHARP protocol for barier and allreduce operations. + +See [`fi_sharp`(7)](https://ofiwg.github.io/libfabric/main/man/fi_sharp.7.html) for more information. + ### sockets *** diff --git a/configure.ac b/configure.ac index 45ea1621af3..5a2b8c4ee30 100644 --- a/configure.ac +++ b/configure.ac @@ -925,6 +925,7 @@ FI_PROVIDER_SETUP([hook_debug]) FI_PROVIDER_SETUP([hook_hmem]) FI_PROVIDER_SETUP([dmabuf_peer_mem]) FI_PROVIDER_SETUP([opx]) +FI_PROVIDER_SETUP([sharp]) FI_PROVIDER_FINI dnl Configure the .pc file FI_PROVIDER_SETUP_PC diff --git a/include/ofi_prov.h b/include/ofi_prov.h index 87f9c935ce9..3668993f5da 100644 --- a/include/ofi_prov.h +++ b/include/ofi_prov.h @@ -323,4 +323,15 @@ OPX_INI ; #define COLL_INIT fi_coll_ini() COLL_INI ; +#if (HAVE_SHARP) && (HAVE_SHARP_DL) +# define SHARP_INI FI_EXT_INI +# define SHARP_INIT NULL +#elif (HAVE_SHARP) +# define SHARP_INI INI_SIG(fi_sharp_ini) +# define SHARP_INIT fi_sharp_ini() +SHARP_INI ; +#else +# define SHARP_INIT NULL +#endif + #endif /* _OFI_PROV_H_ */ diff --git a/include/ofi_sharp.h b/include/ofi_sharp.h new file mode 100644 index 00000000000..793204a43c0 --- /dev/null +++ b/include/ofi_sharp.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2022 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _OFI_SHARP_H_ +#define _OFI_SHARP_H_ + +#include "config.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include "ofi_coll.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define SHARP_VERSION 1 + + +#if ENABLE_DEBUG +#define SHARP_FLAG_DEBUG (1 << 1) +#else +#define SHARP_FLAG_DEBUG (0 << 1) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _OFI_SHM_H_ */ diff --git a/man/fi_sharp.7.md b/man/fi_sharp.7.md new file mode 100644 index 00000000000..c7df95944f2 --- /dev/null +++ b/man/fi_sharp.7.md @@ -0,0 +1,65 @@ +--- +layout: page +title: fi_sharp(7) +tagline: Libfabric Programmer's Manual +--- +{% include JB/setup %} + +# NAME + +fi_sharp \- The SHARP Fabric Provider + +# OVERVIEW + +The SHARP provider is a collectives offload provider that can be used on Linux +systems supporting SHARP protocol. + +# SUPPORTED FEATURES + +This release contains an initial implementation of the SHM provider that +offers the following support: + +*Endpoint types* +: The provider supports only endpoint type *FI_EP_COLLECTIVE*. + +*Endpoint capabilities* +: Endpoints cna support only fi_barrier and fi_allreduce operations. + +*Modes* +: The provider does not require the use of any mode bits. + +*Progress* +: The SHARP provider supports *FI_PROGRESS_MANUAL*. + +*Address Format* +: TBD + +*Msg flags* + The provider does not support messaging. + +*MR registration mode* + The provider implements FI_MR_VIRT_ADDR memory mode. + +*Atomic operations* + The provider does not support any atomic operation. + +# LIMITATIONS + +The SHARP provider has hard-coded maximums for supported queue sizes and data +transfers. These values are reflected in the related fabric attribute +structures + +No support for counters. + +# RUNTIME PARAMETERS + +The *SHARP* provider checks for the following environment variables: + +*FI_SHARP_PARAM1* +: TBD Default: 720401 + +# SEE ALSO + +[`fabric`(7)](fabric.7.html), +[`fi_provider`(7)](fi_provider.7.html), +[`fi_getinfo`(3)](fi_getinfo.3.html) diff --git a/man/man7/fi_sharp.7 b/man/man7/fi_sharp.7 new file mode 100644 index 00000000000..129b8f37909 --- /dev/null +++ b/man/man7/fi_sharp.7 @@ -0,0 +1,69 @@ +.\" Automatically generated by Pandoc 2.0.6 +.\" +.TH "fi_sharp" "7" "2022\-11\-10" "Libfabric Programmer's Manual" "#VERSION#" +.hy +.SH NAME +.PP +fi_sharp \- The SHARP Fabric Provider +.SH OVERVIEW +.PP +The SHARP provider is a collectives offload provider that can be used on +Linux systems supporting SHARP protocol. +.SH SUPPORTED FEATURES +.PP +This release contains an initial implementation of the SHM provider that +offers the following support: +.TP +.B \f[I]Endpoint types\f[] +The provider supports only endpoint type \f[I]FI_EP_COLLECTIVE\f[]. +.RS +.RE +.TP +.B \f[I]Endpoint capabilities\f[] +Endpoints cna support only fi_barrier and fi_allreduce operations. +.RS +.RE +.TP +.B \f[I]Modes\f[] +The provider does not require the use of any mode bits. +.RS +.RE +.TP +.B \f[I]Progress\f[] +The SHARP provider supports \f[I]FI_PROGRESS_MANUAL\f[]. +.RS +.RE +.TP +.B \f[I]Address Format\f[] +TBD +.RS +.RE +.PP +\f[I]Msg flags\f[] The provider does not support messaging. +.PP +\f[I]MR registration mode\f[] The provider implements FI_MR_VIRT_ADDR +memory mode. +.PP +\f[I]Atomic operations\f[] The provider does not support any atomic +operation. +.SH LIMITATIONS +.PP +The SHARP provider has hard\-coded maximums for supported queue sizes +and data transfers. +These values are reflected in the related fabric attribute structures +.PP +No support for counters. +.SH RUNTIME PARAMETERS +.PP +The \f[I]SHARP\f[] provider checks for the following environment +variables: +.TP +.B \f[I]FI_SHARP_PARAM1\f[] +TBD Default: 720401 +.RS +.RE +.SH SEE ALSO +.PP +\f[C]fabric\f[](7), \f[C]fi_provider\f[](7), \f[C]fi_getinfo\f[](3) +.SH AUTHORS +OpenFabrics. diff --git a/prov/sharp/Makefile.include b/prov/sharp/Makefile.include new file mode 100644 index 00000000000..f5f7028ec86 --- /dev/null +++ b/prov/sharp/Makefile.include @@ -0,0 +1,30 @@ +if HAVE_SHARP +_sharp_files = \ + include/ofi_sharp.h \ + prov/sharp/src/sharp.h \ + prov/sharp/src/sharp_attr.c \ + prov/sharp/src/sharp_init.c \ + prov/sharp/src/sharp_fabric.c \ + prov/sharp/src/sharp_domain.c \ + prov/sharp/src/sharp_eq.c \ + prov/sharp/src/sharp_ep.c \ + prov/sharp/src/sharp_cq.c \ + prov/sharp/src/sharp_coll.c \ + prov/sharp/src/sharp_progress.c + +if HAVE_SHARP_DL +pkglib_LTLIBRARIES += libsharp-fi.la +libsharp_fi_la_SOURCES = $(_sharp_files) $(common_srcs) +libsharp_fi_la_LIBADD = $(linkback) $(sharp_LIBS) +libsharp_fi_la_LDFLAGS = -module -avoid-version -shared -export-dynamic +libsharp_fi_la_DEPENDENCIES = $(linkback) +else !HAVE_SHARP_DL +src_libfabric_la_SOURCES += $(_sharp_files) +src_libfabric_la_LIBADD += $(sharp_LIBS) +endif !HAVE_SHARP_DL + +prov_install_man_pages += man/man7/fi_sharp.7 + +endif HAVE_SHARP + +prov_dist_man_pages += man/man7/fi_sharp.7 diff --git a/prov/sharp/configure.m4 b/prov/sharp/configure.m4 new file mode 100644 index 00000000000..539ba929d9d --- /dev/null +++ b/prov/sharp/configure.m4 @@ -0,0 +1,15 @@ +dnl Configury specific to the libfabric sharp provider + +dnl Called to configure this provider +dnl +dnl Arguments: +dnl +dnl $1: action if configured successfully +dnl $2: action if not configured successfully +dnl +AC_DEFUN([FI_SHARP_CONFIGURE],[ + # Determine if we can support the sharp provider + sharp_happy=0 + AS_IF([test x"$enable_sharp" != x"no"], [sharp_happy=1]) + AS_IF([test $sharp_happy -eq 1], [$1], [$2]) +]) diff --git a/prov/sharp/src/sharp.h b/prov/sharp/src/sharp.h new file mode 100644 index 00000000000..c62b6c9baed --- /dev/null +++ b/prov/sharp/src/sharp.h @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2022 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +//#include +//#include +//#include +#include +#include + +#include +#include +#include +//#include +#include +#include +//#include +#include +//#include +//#include +#include +//#include + +#ifndef _SHARP_H_ +#define _SHARP_H_ + +#define SHARP_IOV_LIMIT 1 +#define SHARP_TX_OP_FLAGS (0) +#define SHARP_RX_OP_FLAGS (0) +#define SHARP_DOMAIN_CAPS (FI_COLLECTIVE | FI_LOCAL_COMM | FI_REMOTE_COMM) +enum { + SHARP_RX_SIZE = 65536, + SHARP_TX_SIZE = 16384, +}; + + +struct sharp_env { + size_t ib_port; +}; + +extern struct sharp_env sharp_env; + +/// XXX temporary solution +#ifdef sharp_coll_context +#define sharp_coll_context_t struct sharp_coll_context +#else +#define sharp_coll_context_t void +#endif + +struct sharp_domain { + struct util_domain util_domain; + struct fid_domain *peer_domain; + sharp_coll_context_t *sharp_context; + ofi_atomic32_t ref; // mr count + ofi_spin_t lock; +}; + +struct sharp_fabric { + struct util_fabric util_fabric; +}; + +struct sharp_eq { + struct util_eq util_eq; + struct fid_eq *peer_eq; +}; + +struct sharp_ep { + struct util_ep util_ep; + struct fi_info *sharp_info; + + /* + * Peer ep from the main provider. + * Used for oob communications that SHARP uses during setup. + */ + struct fid_ep *peer_ep; + struct fi_info *peer_info; + + ofi_atomic32_t ref; // mc count + ofi_spin_t lock; +}; + +// XXX to be reused from coll provider +struct sharp_av { + struct util_av util_av; + struct fid_peer_av *peer_av; +}; + +/// XXX temporary solution +#ifdef sharp_coll_comm +#define sharp_coll_comm_t struct sharp_coll_comm +#else +#define sharp_coll_comm_t void +#endif +struct sharp_mc { + struct fid_mc mc_fid; + struct util_av_set *av_set; + uint64_t local_rank; + uint16_t group_id; + uint16_t seq; + ofi_atomic32_t ref; + + struct fid_mc *oob_fid_mc; + struct sharp_ep *ep; + sharp_coll_comm_t *sharp_context; +}; + +struct sharp_cq { + struct util_cq util_cq; + struct fid_peer_cq *peer_cq; +}; + +extern struct fi_fabric_attr sharp_fabric_attr; +extern struct fi_provider sharp_prov; +extern struct util_prov sharp_util_prov; +extern struct fi_info sharp_info; + +int sharp_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, + void *context); + +int sharp_domain2(struct fid_fabric *fabric, struct fi_info *info, + struct fid_domain **dom, uint64_t flags, void *context); + +int sharp_query_collective(struct fid_domain *domain, + enum fi_collective_op coll, struct fi_collective_attr *attr, + uint64_t flags); + + +int sharp_endpoint(struct fid_domain *domain, struct fi_info *info, + struct fid_ep **ep, void *context); + +void sharp_ep_progress(struct util_ep *util_ep); + +int sharp_join_collective(struct fid_ep *ep, const void *addr, + uint64_t flags, struct fid_mc **mc, void *context); + +int sharp_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, + struct fid_cq **cq_fid, void *context); + +int sharp_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, + struct fid_eq **eq_fid, void *context); + +void sharp_collective_comp(struct util_coll_operation *coll_op); + +ssize_t sharp_ep_barrier(struct fid_ep *ep, fi_addr_t coll_addr, void *context); + +ssize_t sharp_ep_barrier2(struct fid_ep *ep, fi_addr_t coll_addr, uint64_t flags, + void *context); + +ssize_t sharp_ep_allreduce(struct fid_ep *ep, const void *buf, size_t count, + void *desc, void *result, void *result_desc, + fi_addr_t coll_addr, enum fi_datatype datatype, + enum fi_op op, uint64_t flags, void *context); + +#endif diff --git a/prov/sharp/src/sharp_attr.c b/prov/sharp/src/sharp_attr.c new file mode 100644 index 00000000000..a379f894160 --- /dev/null +++ b/prov/sharp/src/sharp_attr.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2022 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "sharp.h" + +#define SHARP_TX_CAPS (FI_COLLECTIVE) +#define SHARP_RX_CAPS (FI_COLLECTIVE) + + +struct fi_tx_attr sharp_tx_attr = { + .caps = SHARP_TX_CAPS, + .op_flags = SHARP_TX_OP_FLAGS, + .comp_order = FI_ORDER_NONE, + .msg_order = ~0x0ULL, + .size = SHARP_TX_SIZE, + .iov_limit = SHARP_IOV_LIMIT, + .rma_iov_limit = SHARP_IOV_LIMIT +}; + +struct fi_rx_attr sharp_rx_attr = { + .caps = SHARP_RX_CAPS, + .op_flags = SHARP_RX_OP_FLAGS, + .comp_order = FI_ORDER_NONE, + .msg_order = ~0x0ULL, + .size = SHARP_RX_SIZE, + .iov_limit = SHARP_IOV_LIMIT +}; + +struct fi_ep_attr sharp_ep_attr = { + .type = FI_EP_RDM, + .protocol = FI_PROTO_COLL, + .protocol_version = 1, + .max_msg_size = SIZE_MAX, + .max_order_raw_size = SIZE_MAX, + .max_order_waw_size = SIZE_MAX, + .max_order_war_size = SIZE_MAX, + .mem_tag_format = FI_TAG_GENERIC >> 1, + .tx_ctx_cnt = 1, + .rx_ctx_cnt = 1 +}; + +struct fi_domain_attr sharp_domain_attr = { + .name = "sharp", + .caps = SHARP_DOMAIN_CAPS, + .threading = FI_THREAD_SAFE, + .control_progress = FI_PROGRESS_AUTO, + .data_progress = FI_PROGRESS_AUTO, + .resource_mgmt = FI_RM_ENABLED, + .av_type = FI_AV_UNSPEC, + .mr_mode = 0, + .mr_key_size = sizeof_field(struct fi_rma_iov, key), //XXX + .cq_data_size = sizeof_field(struct ofi_op_hdr, data), + .cq_cnt = 1024, + .ep_cnt = 1, + .tx_ctx_cnt = 1, + .rx_ctx_cnt = 1, + .max_ep_tx_ctx = 1, + .max_ep_rx_ctx = 1, + .mr_iov_limit = SHARP_IOV_LIMIT, +}; + +struct fi_fabric_attr sharp_fabric_attr = { + .name = "sharp", + .prov_version = OFI_VERSION_DEF_PROV +}; + +struct fi_info sharp_info = { + .caps = SHARP_TX_CAPS | SHARP_RX_CAPS | SHARP_DOMAIN_CAPS | FI_COLLECTIVE, + .addr_format = FI_ADDR_STR, + .tx_attr = &sharp_tx_attr, + .rx_attr = &sharp_rx_attr, + .ep_attr = &sharp_ep_attr, + .domain_attr = &sharp_domain_attr, + .fabric_attr = &sharp_fabric_attr +}; diff --git a/prov/sharp/src/sharp_coll.c b/prov/sharp/src/sharp_coll.c new file mode 100644 index 00000000000..f9e38a43c62 --- /dev/null +++ b/prov/sharp/src/sharp_coll.c @@ -0,0 +1,799 @@ +/* + * Copyright (c) 2022 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "ofi_coll.h" +#include "../../coll/src/coll.h" // coll_ep, coll_eq + +#include "sharp.h" + +int sharp_query_collective(struct fid_domain *domain, + enum fi_collective_op coll, struct fi_collective_attr *attr, + uint64_t flags) +{ + if (!attr || attr->mode != 0) + return -FI_EINVAL; + + switch (coll) { + case FI_BARRIER: + return FI_SUCCESS; //XXX to be integrated w/ sharp_query + case FI_ALLREDUCE: + return FI_SUCCESS; //XXX to be integrated w/ sharp_query + case FI_ALLGATHER: + case FI_SCATTER: + case FI_BROADCAST: + case FI_ALLTOALL: + case FI_REDUCE_SCATTER: + case FI_REDUCE: + case FI_GATHER: + default: + return -FI_ENOSYS; + } + + return -FI_ENOSYS; +} + +static int sharp_mc_close(struct fid *fid) +{ + struct sharp_mc *mc; + + mc = container_of(fid, struct sharp_mc, mc_fid.fid); + if (mc->oob_fid_mc) { + fi_close(&(mc->oob_fid_mc->fid)); + mc->oob_fid_mc = NULL; + } + // ofi_atomic_dec32(&mc->ep->ref); //XXX + free(mc); + return 0; +} + +int sharp_mc_bind(struct fid *fid, struct fid *bfid, uint64_t flags) +{ + struct sharp_mc *mc; + struct fid_mc *fid_mc; + mc = container_of(fid, struct sharp_mc, mc_fid.fid); + fid_mc = container_of(bfid, struct fid_mc, fid); + mc->oob_fid_mc = fid_mc; + return 0; +} + +static struct fi_ops sharp_mc_fid_ops = { + .size = sizeof(struct fi_ops), + .close = sharp_mc_close, + .bind = sharp_mc_bind, + .control = fi_no_control, + .ops_open = fi_no_ops_open, +}; + + +int sharp_join_collective1(struct fid_ep *fid, const void *addr, uint64_t flags, + struct fid_mc **mc_fid, void *context) +{ + struct sharp_mc *mc; + //XXX struct fi_peer_transfer_context *peer_context = context; + + mc = calloc(1, sizeof(*mc)); + if (!mc) + return -FI_ENOMEM; + + *mc_fid = &mc->mc_fid; + (*mc_fid)->fid.ops = &sharp_mc_fid_ops; + return 0; +} + +static uint32_t sharp_get_next_id(struct sharp_mc *sharp_mc) +{ + uint32_t cid = sharp_mc->group_id; + return cid << 16 | sharp_mc->seq++; +} + +static struct util_coll_operation * +sharp_create_op(struct fid_ep *ep, struct sharp_mc *sharp_mc, + enum util_coll_op_type type, uint64_t flags, + void *context, util_coll_comp_fn_t comp_fn) +{ + struct util_coll_operation *coll_op; + + coll_op = calloc(1, sizeof(*coll_op)); + if (!coll_op) + return NULL; + + coll_op->ep = ep; + coll_op->cid = sharp_get_next_id(sharp_mc); + coll_op->mc = (struct util_coll_mc *)sharp_mc; //XXX + coll_op->type = type; + coll_op->flags = flags; + coll_op->context = context; + coll_op->comp_fn = comp_fn; + dlist_init(&coll_op->work_queue); + + return coll_op; +} + +static void sharp_log_work(struct util_coll_operation *coll_op) +{ +#if ENABLE_DEBUG + struct util_coll_work_item *cur_item = NULL; + struct util_coll_xfer_item *xfer_item; + struct dlist_entry *tmp = NULL; + size_t count = 0; + + FI_DBG(coll_op->mc->av_set->av->prov, FI_LOG_CQ, + "Remaining Work for %s:\n", + log_util_coll_op_type[coll_op->type]); + dlist_foreach_container_safe(&coll_op->work_queue, + struct util_coll_work_item, + cur_item, waiting_entry, tmp) + { + switch (cur_item->type) { + case UTIL_COLL_SEND: + xfer_item = container_of(cur_item, + struct util_coll_xfer_item, + hdr); + FI_DBG(coll_op->mc->av_set->av->prov, FI_LOG_CQ, + "\t%ld: { %p [%s] SEND TO: 0x%02x FROM: 0x%02lx " + "cnt: %d typesize: %ld tag: 0x%02lx }\n", + count, cur_item, + log_util_coll_state[cur_item->state], + xfer_item->remote_rank, coll_op->mc->local_rank, + xfer_item->count, + ofi_datatype_size(xfer_item->datatype), + xfer_item->tag); + break; + + case UTIL_COLL_RECV: + xfer_item = container_of(cur_item, + struct util_coll_xfer_item, + hdr); + FI_DBG(coll_op->mc->av_set->av->prov, FI_LOG_CQ, + "\t%ld: { %p [%s] RECV FROM: 0x%02x TO: 0x%02lx " + "cnt: %d typesize: %ld tag: 0x%02lx }\n", + count, cur_item, + log_util_coll_state[cur_item->state], + xfer_item->remote_rank, coll_op->mc->local_rank, + xfer_item->count, + ofi_datatype_size(xfer_item->datatype), + xfer_item->tag); + break; + + case UTIL_COLL_REDUCE: + FI_DBG(coll_op->mc->av_set->av->prov, FI_LOG_CQ, + "\t%ld: { %p [%s] REDUCTION }\n", + count, cur_item, + log_util_coll_state[cur_item->state]); + break; + + case UTIL_COLL_COPY: + FI_DBG(coll_op->mc->av_set->av->prov, FI_LOG_CQ, + "\t%ld: { %p [%s] COPY }\n", count, cur_item, + log_util_coll_state[cur_item->state]); + break; + + case UTIL_COLL_COMP: + FI_DBG(coll_op->mc->av_set->av->prov, FI_LOG_CQ, + "\t%ld: { %p [%s] COMPLETION }\n", count, cur_item, + log_util_coll_state[cur_item->state]); + break; + + default: + FI_DBG(coll_op->mc->av_set->av->prov, FI_LOG_CQ, + "\t%ld: { %p [%s] UNKNOWN }\n", count, cur_item, + log_util_coll_state[cur_item->state]); + break; + } + count++; + } +#endif +} + +static void sharp_progress_work(struct util_ep *util_ep, + struct util_coll_operation *coll_op) +{ + struct util_coll_work_item *next_ready = NULL; + struct util_coll_work_item *cur_item = NULL; + struct util_coll_work_item *prev_item = NULL; + struct dlist_entry *tmp = NULL; + int previous_is_head; + + /* clean up any completed items while searching for the next ready */ + dlist_foreach_container_safe(&coll_op->work_queue, + struct util_coll_work_item, + cur_item, waiting_entry, tmp) { + + previous_is_head = (cur_item->waiting_entry.prev == + &cur_item->coll_op->work_queue); + if (!previous_is_head) { + prev_item = container_of(cur_item->waiting_entry.prev, + struct util_coll_work_item, + waiting_entry); + } + + if (cur_item->state == UTIL_COLL_COMPLETE) { + /* + * If there is work before cur and cur is fencing, + * we can't complete. + */ + if (cur_item->fence && !previous_is_head) + continue; + + FI_DBG(coll_op->mc->av_set->av->prov, FI_LOG_CQ, + "Removing Completed Work item: %p \n", cur_item); + dlist_remove(&cur_item->waiting_entry); + free(cur_item); + + /* if the work queue is empty, we're done */ + if (dlist_empty(&coll_op->work_queue)) { + free(coll_op); + return; + } + continue; + } + + /* we can't progress if prior work is fencing */ + if (!previous_is_head && prev_item && prev_item->fence) { + FI_DBG(coll_op->mc->av_set->av->prov, FI_LOG_CQ, + "%p fenced by: %p \n", cur_item, prev_item); + return; + } + + /* + * If the current item isn't waiting, it's not the next + * ready item. + */ + if (cur_item->state != UTIL_COLL_WAITING) { + FI_DBG(coll_op->mc->av_set->av->prov, FI_LOG_CQ, + "Work item not waiting: %p [%s]\n", cur_item, + log_util_coll_state[cur_item->state]); + continue; + } + + FI_DBG(coll_op->mc->av_set->av->prov, FI_LOG_CQ, + "Ready item: %p \n", cur_item); + next_ready = cur_item; + break; + } + + if (!next_ready) + return; + + sharp_log_work(coll_op); + + next_ready->state = UTIL_COLL_PROCESSING; + slist_insert_tail(&next_ready->ready_entry, &util_ep->coll_ready_queue); +} + +static void sharp_bind_work(struct util_coll_operation *coll_op, + struct util_coll_work_item *item) +{ + item->coll_op = coll_op; + dlist_insert_tail(&item->waiting_entry, &coll_op->work_queue); +} + +static int sharp_sched_comp(struct util_coll_operation *coll_op) +{ + struct util_coll_work_item *comp_item; + + comp_item = calloc(1, sizeof(*comp_item)); + if (!comp_item) + return -FI_ENOMEM; + + comp_item->type = UTIL_COLL_COMP; + comp_item->state = UTIL_COLL_WAITING; + comp_item->fence = 1; + + sharp_bind_work(coll_op, comp_item); + return FI_SUCCESS; +} + +static int sharp_find_local_rank(struct fid_ep *ep, + struct sharp_mc *sharp_mc) +{ + struct sharp_av *av = container_of(sharp_mc->av_set->av, struct sharp_av, + util_av.av_fid); + fi_addr_t my_addr; + int i; + + my_addr = av->peer_av->owner_ops->ep_addr(av->peer_av, ep); + + sharp_mc->local_rank = FI_ADDR_NOTAVAIL; + if (my_addr != FI_ADDR_NOTAVAIL) { + for (i = 0; i < sharp_mc->av_set->fi_addr_count; i++) + if (sharp_mc->av_set->fi_addr_array[i] == my_addr) { + sharp_mc->local_rank = i; + break; + } + } + + return FI_SUCCESS; +} + +void sharp_join_comp(struct util_coll_operation *coll_op) +{ + struct fi_eq_entry entry; + struct sharp_ep *ep; + struct sharp_eq *eq; + + ep = container_of(coll_op->ep, struct sharp_ep, util_ep.ep_fid); + eq = container_of(ep->util_ep.eq, struct sharp_eq, util_eq.eq_fid); + + coll_op->data.join.new_mc->seq = 0; + coll_op->data.join.new_mc->group_id = + (uint16_t) ofi_bitmask_get_lsbset(coll_op->data.join.data); + + /* mark the local mask bit */ + ofi_bitmask_unset(ep->util_ep.coll_cid_mask, + coll_op->data.join.new_mc->group_id); + + /* write to the eq */ + memset(&entry, 0, sizeof(entry)); + entry.fid = &coll_op->mc->mc_fid.fid; + entry.context = coll_op->context; + + if (fi_eq_write(eq->peer_eq, FI_JOIN_COMPLETE, &entry, + sizeof(struct fi_eq_entry), FI_COLLECTIVE) < 0) + FI_WARN(ep->util_ep.domain->fabric->prov, FI_LOG_DOMAIN, + "join collective - eq write failed\n"); + + ofi_bitmask_free(&coll_op->data.join.data); + ofi_bitmask_free(&coll_op->data.join.tmp); +} + +void sharp_collective_comp(struct util_coll_operation *coll_op) +{ + struct sharp_ep *ep; + struct sharp_cq *cq; + + ep = container_of(coll_op->ep, struct sharp_ep, util_ep.ep_fid); + cq = container_of(ep->util_ep.tx_cq, struct sharp_cq, util_cq); + + if (cq->peer_cq->owner_ops->write(cq->peer_cq, coll_op->context, + FI_COLLECTIVE, 0, 0, 0, 0, 0)) + FI_WARN(ep->util_ep.domain->fabric->prov, FI_LOG_DOMAIN, + "collective - cq write failed\n"); + + switch (coll_op->type) { + case UTIL_COLL_ALLREDUCE_OP: + free(coll_op->data.allreduce.data); + break; + + case UTIL_COLL_SCATTER_OP: + free(coll_op->data.scatter); + break; + + case UTIL_COLL_BROADCAST_OP: + free(coll_op->data.broadcast.chunk); + free(coll_op->data.broadcast.scatter); + break; + + case UTIL_COLL_JOIN_OP: + case UTIL_COLL_BARRIER_OP: + case UTIL_COLL_ALLGATHER_OP: + default: + /* nothing to clean up */ + break; + } +} + +static ssize_t sharp_process_reduce_item(struct util_coll_reduce_item *reduce_item) +{ + if (reduce_item->op < FI_MIN || reduce_item->op > FI_BXOR) + return -FI_ENOSYS; + + ofi_atomic_write_handler(reduce_item->op, reduce_item->datatype, + reduce_item->inout_buf, + reduce_item->in_buf, + reduce_item->count); + return FI_SUCCESS; +} + +static ssize_t sharp_process_xfer_item(struct util_coll_xfer_item *item) +{ + struct util_coll_operation *coll_op; + struct sharp_ep *ep; + struct fi_msg_tagged msg; + struct iovec iov; + ssize_t ret; + + coll_op = item->hdr.coll_op; + ep = container_of(coll_op->ep, struct sharp_ep, util_ep.ep_fid); + + msg.msg_iov = &iov; + msg.desc = NULL; + msg.iov_count = 1; + msg.ignore = 0; + msg.context = item; + msg.data = 0; + msg.tag = item->tag; + msg.addr = coll_op->mc->av_set->fi_addr_array[item->remote_rank]; + + iov.iov_base = item->buf; + iov.iov_len = (item->count * ofi_datatype_size(item->datatype)); + + if (item->hdr.type == UTIL_COLL_SEND) { + ret = fi_tsendmsg(ep->peer_ep, &msg, FI_PEER_TRANSFER); + if (!ret) + FI_DBG(coll_op->mc->av_set->av->prov, FI_LOG_CQ, + "%p SEND [0x%02lx] -> [0x%02x] cnt: %d sz: %ld\n", + item, coll_op->mc->local_rank, item->remote_rank, + item->count, + item->count * ofi_datatype_size(item->datatype)); + return ret; + } else if (item->hdr.type == UTIL_COLL_RECV) { + ret = fi_trecvmsg(ep->peer_ep, &msg, FI_PEER_TRANSFER); + if (!ret) + FI_DBG(coll_op->mc->av_set->av->prov, FI_LOG_CQ, + "%p RECV [0x%02lx] <- [0x%02x] cnt: %d sz: %ld\n", + item, coll_op->mc->local_rank, item->remote_rank, + item->count, + item->count * ofi_datatype_size(item->datatype)); + return ret; + } + + return -FI_ENOSYS; +} + +void sharp_ep_progress(struct util_ep *util_ep) +{ + struct util_coll_work_item *work_item; + struct util_coll_reduce_item *reduce_item; + struct util_coll_copy_item *copy_item; + struct util_coll_xfer_item *xfer_item; + struct util_coll_operation *coll_op; + ssize_t ret; + + while (!slist_empty(&util_ep->coll_ready_queue)) { + slist_remove_head_container(&util_ep->coll_ready_queue, + struct util_coll_work_item, + work_item, ready_entry); + coll_op = work_item->coll_op; + switch (work_item->type) { + case UTIL_COLL_SEND: + xfer_item = container_of(work_item, + struct util_coll_xfer_item, + hdr); + ret = sharp_process_xfer_item(xfer_item); + if (ret && ret == -FI_EAGAIN) { + slist_insert_tail(&work_item->ready_entry, + &util_ep->coll_ready_queue); + goto out; + } + break; + + case UTIL_COLL_RECV: + xfer_item = container_of(work_item, + struct util_coll_xfer_item, + hdr); + ret = sharp_process_xfer_item(xfer_item); + if (ret) + goto out; + break; + + case UTIL_COLL_REDUCE: + reduce_item = container_of(work_item, + struct util_coll_reduce_item, + hdr); + ret = sharp_process_reduce_item(reduce_item); + if (ret) + goto out; + + reduce_item->hdr.state = UTIL_COLL_COMPLETE; + break; + + case UTIL_COLL_COPY: + copy_item = container_of(work_item, + struct util_coll_copy_item, + hdr); + memcpy(copy_item->out_buf, copy_item->in_buf, + copy_item->count * + ofi_datatype_size(copy_item->datatype)); + + copy_item->hdr.state = UTIL_COLL_COMPLETE; + break; + + case UTIL_COLL_COMP: + if (work_item->coll_op->comp_fn) + work_item->coll_op->comp_fn(work_item->coll_op); + + work_item->state = UTIL_COLL_COMPLETE; + break; + + default: + goto out; + } + + sharp_progress_work(util_ep, coll_op); + } + +out: + return; +} + +ssize_t sharp_peer_xfer_complete(struct fid_ep *ep, + struct fi_cq_tagged_entry *cqe, + fi_addr_t src_addr) +{ + struct util_coll_operation *coll_op; + struct util_ep *util_ep; + struct util_coll_xfer_item *xfer_item; + + xfer_item = cqe->op_context; + xfer_item->hdr.state = UTIL_COLL_COMPLETE; + + coll_op = xfer_item->hdr.coll_op; + FI_DBG(coll_op->mc->av_set->av->prov, FI_LOG_CQ, + "\tXfer complete: { %p %s Remote: 0x%02x Local: " + "0x%02lx cnt: %d typesize: %ld }\n", xfer_item, + xfer_item->hdr.type == UTIL_COLL_SEND ? "SEND" : "RECV", + xfer_item->remote_rank, coll_op->mc->local_rank, + xfer_item->count, ofi_datatype_size(xfer_item->datatype)); + + util_ep = container_of(coll_op->ep, struct util_ep, ep_fid); + sharp_progress_work(util_ep, coll_op); + + return 0; +} + +ssize_t sharp_peer_xfer_error(struct fid_ep *ep, struct fi_cq_err_entry *cqerr) +{ + struct util_coll_operation *coll_op; + struct util_coll_xfer_item *xfer_item; + + xfer_item = cqerr->op_context; + xfer_item->hdr.state = UTIL_COLL_COMPLETE; + + coll_op = xfer_item->hdr.coll_op; + FI_DBG(coll_op->mc->av_set->av->prov, FI_LOG_CQ, + "\tXfer error: { %p %s Remote: 0x%02x Local: " + "0x%02lx cnt: %d typesize: %ld }\n", xfer_item, + xfer_item->hdr.type == UTIL_COLL_SEND ? "SEND" : "RECV", + xfer_item->remote_rank, coll_op->mc->local_rank, + xfer_item->count, ofi_datatype_size(xfer_item->datatype)); + + /* TODO: finish the work with error */ + + return 0; +} + +static struct sharp_mc *sharp_create_mc(struct util_av_set *av_set, + void *context) +{ + struct sharp_mc *sharp_mc; + + sharp_mc = calloc(1, sizeof(*sharp_mc)); + if (!sharp_mc) + return NULL; + + sharp_mc->mc_fid.fid.fclass = FI_CLASS_MC; + sharp_mc->mc_fid.fid.context = context; + sharp_mc->mc_fid.fid.ops = &sharp_mc_fid_ops; + sharp_mc->mc_fid.fi_addr = (uintptr_t) sharp_mc; + + ofi_atomic_inc32(&av_set->ref); + sharp_mc->av_set = av_set; + + return sharp_mc; +} + +int sharp_join_collective(struct fid_ep *ep, const void *addr, + uint64_t flags, struct fid_mc **mc, void *context) +{ + struct sharp_mc *new_sharp_mc; + struct util_av_set *av_set; + struct sharp_mc *sharp_mc; + struct util_coll_operation *join_op; + struct util_ep *util_ep; + struct sharp_ep *sharp_ep; + struct fi_collective_addr *c_addr; + fi_addr_t sharp_addr; + const struct fid_av_set *set; + struct fid_mc *util_mc; + int ret; + + if (!(flags & FI_COLLECTIVE)) + return -FI_ENOSYS; + + util_ep = container_of(ep, struct util_ep, ep_fid); + sharp_ep = container_of(ep, struct sharp_ep, util_ep.ep_fid); + + ret = fi_join(sharp_ep->peer_ep, addr, flags | FI_PEER, &util_mc, context); + if (ret) + return ret; + + c_addr = (struct fi_collective_addr *)addr; + sharp_addr = c_addr->coll_addr; + set = c_addr->set; + + av_set = container_of(set, struct util_av_set, av_set_fid); + + if (sharp_addr == FI_ADDR_NOTAVAIL) { + assert(av_set->av->av_set); + sharp_mc = (struct sharp_mc*) &av_set->av->av_set->coll_mc; //XXX + } else { + sharp_mc = (struct sharp_mc*) (struct util_coll_mc*) ((uintptr_t) sharp_addr); //XXX + } + + new_sharp_mc = sharp_create_mc(av_set, context); + if (!new_sharp_mc) + { + ret = FI_ENOMEM; + goto err0; + } + + /* get the rank */ + sharp_find_local_rank(ep, new_sharp_mc); + sharp_find_local_rank(ep, sharp_mc); + + join_op = sharp_create_op(ep, sharp_mc, UTIL_COLL_JOIN_OP, flags, + context, sharp_join_comp); + if (!join_op) { + ret = -FI_ENOMEM; + goto err1; + } + + join_op->data.join.new_mc = (struct util_coll_mc *)new_sharp_mc; //XXX + + ret = ofi_bitmask_create(&join_op->data.join.data, OFI_MAX_GROUP_ID); + if (ret) + goto err2; + + ret = ofi_bitmask_create(&join_op->data.join.tmp, OFI_MAX_GROUP_ID); + if (ret) + goto err3; + + +#if 0 ///XXX + ret = sharp_do_allreduce(join_op, util_ep->coll_cid_mask->bytes, + join_op->data.join.data.bytes, + join_op->data.join.tmp.bytes, + (int) ofi_bitmask_bytesize(util_ep->coll_cid_mask), + FI_UINT8, FI_BAND); + if (ret) + goto err4; +#endif ///XXX + ret = sharp_sched_comp(join_op); + if (ret) + goto err4; + + sharp_progress_work(util_ep, join_op); + + *mc = &new_sharp_mc->mc_fid; + new_sharp_mc->oob_fid_mc = util_mc; + return FI_SUCCESS; + +err4: + ofi_bitmask_free(&join_op->data.join.tmp); +err3: + ofi_bitmask_free(&join_op->data.join.data); +err2: + free(join_op); +err1: + fi_close(&new_sharp_mc->mc_fid.fid); +err0: + fi_close(&util_mc->fid); + return ret; +} + +ssize_t sharp_ep_barrier2(struct fid_ep *ep, fi_addr_t coll_addr, uint64_t flags, + void *context) +{ + struct sharp_mc *sharp_mc; + struct util_coll_operation *barrier_op; + struct util_ep *util_ep; + int ret; + + sharp_mc = (struct sharp_mc*) (struct util_coll_mc*) ((uintptr_t) coll_addr); //XXX + + barrier_op = sharp_create_op(ep, sharp_mc, UTIL_COLL_BARRIER_OP, + flags, context, + sharp_collective_comp); + if (!barrier_op) + return -FI_ENOMEM; + +#if 0 + send = ~barrier_op->mc->local_rank; + ret = coll_do_allreduce(barrier_op, &send, + &barrier_op->data.barrier.data, + &barrier_op->data.barrier.tmp, 1, FI_UINT64, + FI_BAND); + if (ret) + goto err1; +#endif + ret = sharp_sched_comp(barrier_op); + if (ret) + goto err1; + + util_ep = container_of(ep, struct util_ep, ep_fid); + sharp_progress_work(util_ep, barrier_op); + + return FI_SUCCESS; +err1: + free(barrier_op); + return ret; + +} + +ssize_t sharp_ep_barrier(struct fid_ep *ep, fi_addr_t coll_addr, void *context) +{ + return sharp_ep_barrier2(ep, coll_addr, 0, context); +} + + +ssize_t sharp_ep_allreduce(struct fid_ep *ep, const void *buf, size_t count, + void *desc, void *result, void *result_desc, + fi_addr_t coll_addr, enum fi_datatype datatype, + enum fi_op op, uint64_t flags, void *context) +{ + struct sharp_mc *sharp_mc; + struct util_coll_operation *allreduce_op; + struct util_ep *util_ep; + int ret; + + sharp_mc = (struct sharp_mc *) ((uintptr_t) coll_addr); + allreduce_op = sharp_create_op(ep, sharp_mc, UTIL_COLL_ALLREDUCE_OP, + flags, context, + sharp_collective_comp); + if (!allreduce_op) + return -FI_ENOMEM; + + allreduce_op->data.allreduce.size = count * ofi_datatype_size(datatype); + allreduce_op->data.allreduce.data = calloc(count, + ofi_datatype_size(datatype)); + if (!allreduce_op->data.allreduce.data) { + ret = -FI_ENOMEM; + goto err1; + } + +#if 0 + ret = coll_do_allreduce(allreduce_op, buf, result, + allreduce_op->data.allreduce.data, count, + datatype, op); + if (ret) + goto err2; +#else + memcpy(result, buf, count * ofi_datatype_size(datatype)); +#endif + ret = sharp_sched_comp(allreduce_op); + if (ret) + goto err2; + + util_ep = container_of(ep, struct util_ep, ep_fid); + sharp_progress_work(util_ep, allreduce_op); + + return FI_SUCCESS; + +err2: + free(allreduce_op->data.allreduce.data); +err1: + free(allreduce_op); + return ret; +} diff --git a/prov/sharp/src/sharp_cq.c b/prov/sharp/src/sharp_cq.c new file mode 100644 index 00000000000..2eeebb51706 --- /dev/null +++ b/prov/sharp/src/sharp_cq.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2022 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "sharp.h" +#include "ofi_coll.h" //for coll_cq_init + + +static int sharp_cq_close(struct fid *fid) +{ + struct sharp_cq *cq; + int ret; + + cq = container_of(fid, struct sharp_cq, util_cq.cq_fid.fid); + + ret = ofi_cq_cleanup(&cq->util_cq); + if (ret) + return ret; + + free(cq); + return 0; +} + +static struct fi_ops coll_cq_fi_ops = { + .size = sizeof(struct fi_ops), + .close = sharp_cq_close, + .bind = fi_no_bind, + .control = fi_no_control, + .ops_open = fi_no_ops_open, +}; + +static struct fi_ops_cq coll_cq_ops = { + .size = sizeof(struct fi_ops_cq), + .read = fi_no_cq_read, + .readfrom = fi_no_cq_readfrom, + .readerr = fi_no_cq_readerr, + .sread = fi_no_cq_sread, + .sreadfrom = fi_no_cq_sreadfrom, + .signal = fi_no_cq_signal, + .strerror = fi_no_cq_strerror, +}; + +int sharp_cq_init(struct fid_domain *domain, + struct fi_cq_attr *attr, struct fid_cq **cq_fid, + ofi_cq_progress_func progress, void *context) +{ + struct sharp_cq *cq; + struct fi_peer_cq_context *peer_context = context; + int ret; + + const struct sharp_domain *coll_domain; + const struct fi_provider* provider; + + coll_domain = container_of(domain, struct sharp_domain, util_domain.domain_fid.fid); + provider = coll_domain->util_domain.fabric->prov; + + if (!attr || !(attr->flags & FI_PEER)) { + FI_WARN(provider, FI_LOG_CORE, "FI_PEER flag required\n"); + return -EINVAL; + } + + if (!peer_context || peer_context->size < sizeof(*peer_context)) { + FI_WARN(provider, FI_LOG_CORE, "invalid peer CQ context\n"); + return -EINVAL; + } + + cq = calloc(1, sizeof(*cq)); + if (!cq) + return -FI_ENOMEM; + + cq->peer_cq = peer_context->cq; + + ret = ofi_cq_init(provider, domain, attr, &cq->util_cq, progress, context); + if (ret) + goto err; + + *cq_fid = &cq->util_cq.cq_fid; + (*cq_fid)->fid.ops = &coll_cq_fi_ops; + (*cq_fid)->ops = &coll_cq_ops; + return 0; + +err: + free(cq); + return ret; +} + +int sharp_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, + struct fid_cq **cq_fid, void *context) +{ + return sharp_cq_init(domain, attr, cq_fid, &ofi_cq_progress, context); +} diff --git a/prov/sharp/src/sharp_domain.c b/prov/sharp/src/sharp_domain.c new file mode 100644 index 00000000000..76a66779fa5 --- /dev/null +++ b/prov/sharp/src/sharp_domain.c @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2022 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "sharp.h" + +#include "../../coll/src/coll.h" //for coll_av_open + +struct sharp_mr { + struct fid_mr mr_fid; + void *mr_handle; //obtained from sharp_coll_reg_mr + // alternatively mr_fid.mem_desc stores result of sharp_coll_reg_mr + struct sharp_domain *domain; +}; + +static int sharp_mr_close(fid_t fid) +{ +#if 0 + struct fid_mr mr_fid; + struct sharp_mr *sharp_mr = container_of(fid, struct sharp_mr, mr_fid.fid); +#endif +// XXX + return 0; +} +static struct fi_ops sharp_mr_fi_ops = { + .size = sizeof(struct fi_ops), + .close = sharp_mr_close, + .bind = fi_no_bind, + .control = fi_no_control, + .ops_open = fi_no_ops_open, +}; + + +static int sharp_mr_reg(struct fid *fid, const void *buf, size_t len, + uint64_t access, uint64_t offset, uint64_t requested_key, + uint64_t flags, struct fid_mr **mr, void *context) +{ + struct sharp_mr *sharp_mr; +#if 0 + struct sharp_domain *sharp_domain = container_of(fid, + struct sharp_domain, util_domain.domain_fid.fid); +#endif + + sharp_mr = calloc(1, sizeof(*sharp_mr)); + if (!sharp_mr) + return -FI_ENOMEM; + + void *sharp_coll_mr = NULL; + // maped to sharp_coll_reg_mr + // Only one outstanding registration supported. no registration cache. + + sharp_mr->mr_fid.fid.fclass = FI_CLASS_MR; + sharp_mr->mr_fid.fid.context = context; + sharp_mr->mr_fid.fid.ops = &sharp_mr_fi_ops; + sharp_mr->mr_fid.mem_desc = sharp_coll_mr; + sharp_mr->mr_fid.key = FI_KEY_NOTAVAIL; + *mr = &sharp_mr->mr_fid; + + // XXX do we need to track mrs inside domain + return 0; +} + +static struct fi_ops_mr sharp_domain_mr_ops = { + .size = sizeof(struct fi_ops_mr), + .reg = sharp_mr_reg, + .regv = fi_no_mr_regv, + .regattr = fi_no_mr_regattr, +}; + +static struct fi_ops_domain sharp_domain_ops = { + .size = sizeof(struct fi_ops_domain), + .av_open = coll_av_open, + .cq_open = sharp_cq_open, + .endpoint = sharp_endpoint, + .scalable_ep = fi_no_scalable_ep, + .cntr_open = fi_no_cntr_open, + .poll_open = fi_poll_create, + .stx_ctx = fi_no_stx_context, + .srx_ctx = fi_no_srx_context, + .query_atomic = fi_no_query_atomic, + .query_collective = sharp_query_collective, + .endpoint2 = fi_no_endpoint2 +}; + +static int sharp_domain_close(fid_t fid) +{ + int ret; + struct sharp_domain *domain; + + domain = container_of(fid, struct sharp_domain, util_domain.domain_fid.fid); + /// mapped to int sharp_coll_finalize(struct sharp_coll_context *context); + ret = ofi_domain_close(&domain->util_domain); + if (ret) + return ret; + + free(domain); + return 0; +} + +static struct fi_ops sharp_domain_fi_ops = { + .size = sizeof(struct fi_ops), + .close = sharp_domain_close, + .bind = fi_no_bind, + .control = fi_no_control, + .ops_open = fi_no_ops_open, +}; + +static inline void +fid_domain_init(struct fid_domain **domain_fid, + struct util_domain *util_domain, struct fi_ops *fid_ops, + struct fi_ops_domain *ops, struct fi_ops_mr *mr) +{ + *domain_fid = &util_domain->domain_fid; + (*domain_fid)->fid.ops = fid_ops; + (*domain_fid)->ops = ops; + (*domain_fid)->mr = mr; +} + +int sharp_domain2(struct fid_fabric *fabric, struct fi_info *info, + struct fid_domain **domain_fid, uint64_t flags, void *context) +{ + int ret; + struct sharp_domain *domain; + struct fi_peer_domain_context *peer_context = context; + + if (!(flags & FI_PEER)) { + FI_WARN(&sharp_prov, FI_LOG_CORE, + "FI_PEER flag required\n"); + return -EINVAL; + } + + if (!peer_context || peer_context->size < sizeof(*peer_context)) { + FI_WARN(&sharp_prov, FI_LOG_CORE, + "Invalid peer domain context\n"); + return -EINVAL; + } + + ret = ofi_prov_check_info(&sharp_util_prov, fabric->api_version, info); + if (ret) + return ret; + + domain = calloc(1, sizeof(*domain)); + if (!domain) + return -FI_ENOMEM; + + ret = ofi_domain_init(fabric, info, &domain->util_domain, context, + OFI_LOCK_MUTEX); + + + if (ret) { + free(domain); + return ret; + } + + ofi_atomic_initialize32(&domain->ref, 0); + domain->util_domain.threading = FI_THREAD_UNSPEC; + +#if 0 + // XXX + *domain_fid = &domain->util_domain.domain_fid; + (*domain_fid)->fid.ops = &sharp_domain_fi_ops; + (*domain_fid)->ops = &sharp_domain_ops; + (*domain_fid)->mr = &sharp_domain_mr_ops; +#endif + fid_domain_init(domain_fid, &domain->util_domain, &sharp_domain_fi_ops, + &sharp_domain_ops, &sharp_domain_mr_ops); + + +// XXX maped to +// int sharp_coll_init(struct sharp_coll_init_spec *sharp_coll_spec, +// struct sharp_coll_context **sharp_coll_context); +#if 0 +struct sharp_coll_init_spec { + uint64_t job_id; /**< Job unique ID */ + int world_rank; /**< Global unique process id. */ + int world_size; /**< Num of processes in the job. */ + int (*progress_func)(void); /**< External progress function. */ + int group_channel_idx; /**< local group channel index(0 .. (max - 1))*/ + struct sharp_coll_config config; /**< @ref sharp_coll_config "SHARP COLL Configuration". */ + struct sharp_coll_out_of_band_colls oob_colls; /**< @ref sharp_coll_out_of_band_colls "List of OOB collectives". */ + int world_local_rank; /**< relative rank of this process on this node within its job. */ + int enable_thread_support; /**< enable multi threaded support. */ + void *oob_ctx; /**< context for OOB functions in sharp_coll_init */ + int reserved[4]; /**< Reserved */ +}; +#endif + return 0; +} diff --git a/prov/sharp/src/sharp_ep.c b/prov/sharp/src/sharp_ep.c new file mode 100644 index 00000000000..2669310ac71 --- /dev/null +++ b/prov/sharp/src/sharp_ep.c @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2022 Intel Corporation. All rights reserved + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "ofi_iov.h" +#include "ofi_mr.h" + +#include "sharp.h" + +static int sharp_getname(fid_t fid, void *addr, size_t *addrlen) +{ + int ret; + struct sharp_ep *ep; + char *name = addr; + + ep = container_of(fid, struct sharp_ep, util_ep.ep_fid.fid); + + if (!name || *addrlen == 0 || + snprintf(name, *addrlen, "sharp ") >= *addrlen) + return -FI_ETOOSMALL; + + *addrlen -= 6; + name +=6; + ret = fi_getname(&ep->peer_ep->fid, name, addrlen); + + if (!ret) + *addrlen -= 6; + + return ret; +} + +static struct fi_ops_cm sharp_ep_cm_ops = { + .size = sizeof(struct fi_ops_cm), + .setname = fi_no_setname, + .getname = sharp_getname, + .getpeer = fi_no_getpeer, + .connect = fi_no_connect, //XXX + .listen = fi_no_listen, + .accept = fi_no_accept, + .reject = fi_no_reject, + .shutdown = fi_no_shutdown, + .join = sharp_join_collective, +}; + +static int sharp_ep_close(struct fid *fid) +{ + struct sharp_ep *ep; + + ep = container_of(fid, struct sharp_ep, util_ep.ep_fid.fid); + + ofi_endpoint_close(&ep->util_ep); + ofi_spin_destroy(&ep->lock); + + free(ep); + return 0; +} + +static int sharp_ep_bind(struct fid *ep_fid, struct fid *bfid, uint64_t flags) +{ + switch (bfid->fclass) { + case FI_CLASS_AV: + case FI_CLASS_CQ: + return ofi_ep_fid_bind(ep_fid, bfid, flags); + case FI_CLASS_EQ: + case FI_CLASS_CNTR: + case FI_CLASS_SRX_CTX: + default: + FI_WARN(&sharp_prov, FI_LOG_EP_CTRL, + "invalid fid class\n"); + return -FI_EINVAL; + } + return -FI_EINVAL; +} + +static int sharp_ep_ctrl(struct fid *fid, int command, void *arg) +{ + if (command == FI_ENABLE) + return 0; + + return -FI_ENOSYS; +} + +static struct fi_ops sharp_ep_fid_ops = { + .size = sizeof(struct fi_ops), + .close = sharp_ep_close, + .bind = sharp_ep_bind, + .control = sharp_ep_ctrl, + .ops_open = fi_no_ops_open, +}; + +static struct fi_ops_collective sharp_ep_collective_ops = { + .size = sizeof(struct fi_ops_collective), + .barrier = sharp_ep_barrier, + .broadcast = fi_coll_no_broadcast, + .alltoall = fi_coll_no_alltoall, + .allreduce = sharp_ep_allreduce, + .allgather = fi_coll_no_allgather, + .reduce_scatter = fi_coll_no_reduce_scatter, + .reduce = fi_coll_no_reduce, + .scatter = fi_coll_no_scatter, + .gather = fi_coll_no_gather, + .msg = fi_coll_no_msg, + .barrier2 = sharp_ep_barrier2, +}; +static struct fi_ops_ep sharp_ep_ops = { + .size = sizeof(struct fi_ops_ep), + .cancel = fi_no_cancel, //XXX + .getopt = fi_no_getopt, + .setopt = fi_no_setopt, + .tx_ctx = fi_no_tx_ctx, + .rx_ctx = fi_no_rx_ctx, + .rx_size_left = fi_no_rx_size_left, + .tx_size_left = fi_no_tx_size_left, +}; + +inline static void +fid_ep_init(struct fid_ep **ep_fid, + struct util_ep *util_ep, struct fi_ops *fid_ops, + struct fi_ops_ep *ops, struct fi_ops_cm *cm, + struct fi_ops_msg *msg, struct fi_ops_rma *rma, + struct fi_ops_tagged *tagged, struct fi_ops_atomic *atomic, + struct fi_ops_collective *collective) +{ + *ep_fid = &util_ep->ep_fid; + (*ep_fid)->fid.ops = fid_ops; + (*ep_fid)->ops = ops; + (*ep_fid)->cm = cm; + (*ep_fid)->msg = msg; + (*ep_fid)->rma = rma; + (*ep_fid)->tagged = tagged; + (*ep_fid)->atomic = atomic; + (*ep_fid)->collective = collective; +} + +int sharp_endpoint(struct fid_domain *domain, struct fi_info *info, + struct fid_ep **ep_fid, void *context) +{ + struct sharp_ep *ep; + struct fi_peer_transfer_context *peer_context = context; + int ret; + + if (!info || !(info->mode & FI_PEER_TRANSFER)) { + FI_WARN(&sharp_prov, FI_LOG_CORE, + "FI_PEER_TRANSFER mode required\n"); + return -EINVAL; + } + + if (!peer_context || peer_context->size < sizeof(*peer_context)) { + FI_WARN(&sharp_prov, FI_LOG_CORE, + "Invalid peer transfer context\n"); + return -EINVAL; + } + + ep = calloc(1, sizeof(*ep)); + if (!ep) + return -FI_ENOMEM; + + ep->sharp_info = fi_dupinfo(info); + if (!ep->sharp_info) { + ret = -FI_ENOMEM; + goto err; + } + + ep->peer_info = fi_dupinfo(peer_context->info); + if (!ep->peer_info) { + ret = -FI_ENOMEM; + goto err; + } + + ep->peer_ep = peer_context->ep; + + ret = ofi_endpoint_init(domain, &sharp_util_prov, info, &ep->util_ep, context, + sharp_ep_progress); + + if (ret) + goto err; + + ofi_atomic_initialize32(&ep->ref, 0); + + ret = ofi_spin_init(&ep->lock); + if (ret) { + ofi_spin_destroy(&ep->lock); + goto err; + } + + fid_ep_init(ep_fid, &ep->util_ep, &sharp_ep_fid_ops, &sharp_ep_ops, + &sharp_ep_cm_ops, NULL, NULL, NULL, NULL, &sharp_ep_collective_ops); + + return 0; + +err: + fi_freeinfo(ep->peer_info); + fi_freeinfo(ep->sharp_info); + free(ep); + return ret; +} + + diff --git a/prov/sharp/src/sharp_eq.c b/prov/sharp/src/sharp_eq.c new file mode 100644 index 00000000000..08d3332a688 --- /dev/null +++ b/prov/sharp/src/sharp_eq.c @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2022 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "sharp.h" + +static int sharp_eq_close(struct fid *fid) +{ + struct sharp_eq *eq; + int ret; + + eq = container_of(fid, struct sharp_eq, util_eq.eq_fid.fid); + + ret = ofi_eq_cleanup(fid); + if (ret) + return ret; + + free(eq); + return 0; +} + +static struct fi_ops sharp_eq_fi_ops = { + .size = sizeof(struct fi_ops), + .close = sharp_eq_close, + .bind = fi_no_bind, + .control = fi_no_control, + .ops_open = fi_no_ops_open, +}; + +static struct fi_ops_eq sharp_eq_ops = { + .size = sizeof(struct fi_ops_cq), + .read = fi_no_eq_read, + .readerr = fi_no_eq_readerr, + .write = fi_no_eq_write, + .sread = fi_no_eq_sread, + .strerror = fi_no_eq_strerror, +}; + +int sharp_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, + struct fid_eq **eq_fid, void *context) +{ + struct sharp_eq *eq; + struct fi_peer_eq_context *peer_context = context; + int ret; + + if (!attr || !(attr->flags & FI_PEER)) { + FI_WARN(&sharp_prov, FI_LOG_CORE, "FI_PEER flag required\n"); + return -EINVAL; + } + + if (!peer_context || peer_context->size < sizeof(*peer_context)) { + FI_WARN(&sharp_prov, FI_LOG_CORE, "invalid peer EQ context\n"); + return -EINVAL; + } + + eq = calloc(1, sizeof(*eq)); + if (!eq) + return -FI_ENOMEM; + + eq->peer_eq = peer_context->eq; + + ret = ofi_eq_init(fabric, attr, &eq->util_eq.eq_fid, context); + if (ret) + goto err; + + *eq_fid = &eq->util_eq.eq_fid; + (*eq_fid)->fid.ops = &sharp_eq_fi_ops; + (*eq_fid)->ops = &sharp_eq_ops; + return 0; + +err: + free(eq); + return ret; +} diff --git a/prov/sharp/src/sharp_fabric.c b/prov/sharp/src/sharp_fabric.c new file mode 100644 index 00000000000..99c4e6e3a5d --- /dev/null +++ b/prov/sharp/src/sharp_fabric.c @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2022 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "sharp.h" + +static struct fi_ops_fabric sharp_fabric_ops = { + .size = sizeof(struct fi_ops_fabric), + .domain = fi_no_domain, + .passive_ep = fi_no_passive_ep, + .eq_open = sharp_eq_open, + .wait_open = fi_no_wait_open, + .trywait = ofi_trywait, + .domain2 = sharp_domain2 +}; + +static int sharp_fabric_close(fid_t fid) +{ + int ret; + struct util_fabric *fabric; + + fabric = container_of(fid, struct util_fabric, fabric_fid.fid); + + ret = ofi_fabric_close(fabric); + if (ret) + return ret; + + free(fabric); + return 0; +} + +static struct fi_ops sharp_fabric_fi_ops = { + .size = sizeof(struct fi_ops), + .close = sharp_fabric_close, + .bind = fi_no_bind, + .control = fi_no_control, + .ops_open = fi_no_ops_open, +}; + +/// XXX to be added to fabric.h later +static inline void +fid_fabric_init(struct fid_fabric **fabric_fid, + struct util_fabric *util_fabric, struct fi_ops *fid_ops, + struct fi_ops_fabric *ops) +{ + *fabric_fid = &util_fabric->fabric_fid; + (*fabric_fid)->fid.ops = fid_ops; + (*fabric_fid)->ops = ops; +} + +int sharp_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric_fid, + void *context) +{ + int ret; + struct sharp_fabric *fabric; + + fabric = calloc(1, sizeof(*fabric)); + if (!fabric) + return -FI_ENOMEM; + + ret = ofi_fabric_init(&sharp_prov, &sharp_fabric_attr, attr, + &fabric->util_fabric, context); + if (ret) + goto err; + +#if 0 + *fabric_fid = &fabric->util_fabric.fabric_fid; ///XXX to be removed later + (*fabric_fid)->fid.ops = &sharp_fabric_fi_ops; + (*fabric_fid)->ops = &sharp_fabric_ops; +#endif + fid_fabric_init(fabric_fid, &fabric->util_fabric, &sharp_fabric_fi_ops, + &sharp_fabric_ops); + return 0; + +err: + free(fabric); + return ret; +} diff --git a/prov/sharp/src/sharp_init.c b/prov/sharp/src/sharp_init.c new file mode 100644 index 00000000000..17d0fbce690 --- /dev/null +++ b/prov/sharp/src/sharp_init.c @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2022 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include +#include "sharp.h" + +struct sharp_env sharp_env = { + .ib_port = 1, +}; + +static void sharp_init_env(void) +{ + fi_param_get_size_t(&sharp_prov, "ib_port", &sharp_env.ib_port); +} + +static int sharp_getinfo(uint32_t version, const char *node, const char *service, + uint64_t flags, const struct fi_info *hints, + struct fi_info **info) +{ + int ret; + + ret = util_getinfo(&sharp_util_prov, version, node, service, flags, + hints, info); + if (ret) + return ret; + + return 0; +} + +static void sharp_fini(void) +{ +#if HAVE_SHARP_DL + ofi_hmem_cleanup(); +#endif +} + +struct fi_provider sharp_prov = { + .name = OFI_OFFLOAD_PREFIX "sharp", + .version = OFI_VERSION_DEF_PROV, + .fi_version = OFI_VERSION_LATEST, + .getinfo = sharp_getinfo, + .fabric = sharp_fabric, + .cleanup = sharp_fini +}; + +/// @brief XXX to be moved to sharp_attr.c +struct util_prov sharp_util_prov = { + .prov = &sharp_prov, + .info = &sharp_info, + .flags = 0 +}; + +SHARP_INI +{ +#if HAVE_SHARP_DL + ofi_hmem_init(); +#endif + fi_param_define(&sharp_prov, "ib_port", FI_PARAM_INT, + "IB device port used by SHARP \ + Default: 1"); + + sharp_init_env(); + return &sharp_prov; +} diff --git a/prov/sharp/src/sharp_progress.c b/prov/sharp/src/sharp_progress.c new file mode 100644 index 00000000000..46d849fbef9 --- /dev/null +++ b/prov/sharp/src/sharp_progress.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2022 Intel Corporation. All rights reserved + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "ofi_iov.h" +#include "ofi_hmem.h" +#include "ofi_atom.h" +#include "ofi_mr.h" +#include "sharp.h" +#if 0 +/// @brief to be moved to sharp_coll.c XXX +/// @param util_ep +void sharp_ep_progress(struct util_ep *util_ep) +{ +#if 0 + struct sharp_ep *ep; + + ep = container_of(util_ep, struct sharp_ep, util_ep); +#endif +} +#endif \ No newline at end of file diff --git a/src/fabric.c b/src/fabric.c index 046e4206d81..74881d8c6b6 100644 --- a/src/fabric.c +++ b/src/fabric.c @@ -425,7 +425,7 @@ static void ofi_ordered_provs_init(void) "ofi_hook_dmabuf_peer_mem", /* So do the offload providers. */ - "off_coll", + "off_coll", "off_sharp", }; struct ofi_prov *prov; int num_provs, i; @@ -870,6 +870,7 @@ void fi_ini(void) ofi_register_provider(HOOK_NOOP_INIT, NULL); ofi_register_provider(COLL_INIT, NULL); + ofi_register_provider(SHARP_INIT, NULL); ofi_init = 1; diff --git a/src/fi_tostr.c b/src/fi_tostr.c index a4ef1cc9585..5e6ee84cb88 100644 --- a/src/fi_tostr.c +++ b/src/fi_tostr.c @@ -276,6 +276,7 @@ static void ofi_tostr_protocol(char *buf, size_t len, uint32_t protocol) CASEENUMSTRN(FI_PROTO_OPX, len); CASEENUMSTRN(FI_PROTO_CXI, len); CASEENUMSTRN(FI_PROTO_XNET, len); + CASEENUMSTRN(FI_PROTO_COLL, len); default: if (protocol & FI_PROV_SPECIFIC) ofi_strncatf(buf, len, "Provider specific"); diff --git a/util/info.c b/util/info.c index 9f375a77719..dcb010f567c 100644 --- a/util/info.c +++ b/util/info.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2013-2020 Intel Corporation. All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2022 Intel Corporation. All rights reserved. * * This software is available to you under the BSD license below: * @@ -35,6 +36,7 @@ #include #include +#include #include #include @@ -68,6 +70,7 @@ static const struct option longopts[] = { {"list", no_argument, NULL, 'l'}, {"verbose", no_argument, NULL, 'v'}, {"version", no_argument, &ver, 1}, + {"offload", no_argument, NULL, 'o'}, {0,0,0,0} }; @@ -89,6 +92,7 @@ static const char *help_strings[][2] = { {"", "\t\tlist available libfabric providers"}, {"", "\t\tverbose output"}, {"", "\t\tprint version info and exit"}, + {"", "\t\tshow only offload providers (experimental)"}, {"", ""} }; @@ -355,7 +359,7 @@ int main(int argc, char **argv) hints->domain_attr->mode = ~0; hints->domain_attr->mr_mode = ~(FI_MR_BASIC | FI_MR_SCALABLE); - while ((op = getopt_long(argc, argv, "s:n:P:c:m:t:a:p:d:f:eg:i:lhv", longopts, + while ((op = getopt_long(argc, argv, "s:n:P:c:m:t:a:p:d:f:eg:i:lhv:o", longopts, &option_index)) != -1) { switch (op) { case 0: @@ -436,6 +440,9 @@ int main(int argc, char **argv) case 'v': verbose = 1; break; + case 'o': + flags |= OFI_OFFLOAD_PROV_ONLY; + break; case 'h': default: print_help: From 6d2a514ac5bd257b9c03312abbd97f05eff6ef73 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 5 Dec 2022 11:42:51 +0100 Subject: [PATCH 09/10] prov/sharp: Add mocks for SHARP This patch requires the original SHARP header in /usr/include/mellanox/sharp.h Signed-off-by: Lukasz Dorau --- prov/sharp/Makefile.include | 1 + prov/sharp/src/sharp_mocks.c | 85 ++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 prov/sharp/src/sharp_mocks.c diff --git a/prov/sharp/Makefile.include b/prov/sharp/Makefile.include index f5f7028ec86..859eae3a6e5 100644 --- a/prov/sharp/Makefile.include +++ b/prov/sharp/Makefile.include @@ -10,6 +10,7 @@ _sharp_files = \ prov/sharp/src/sharp_ep.c \ prov/sharp/src/sharp_cq.c \ prov/sharp/src/sharp_coll.c \ + prov/sharp/src/sharp_mocks.c \ prov/sharp/src/sharp_progress.c if HAVE_SHARP_DL diff --git a/prov/sharp/src/sharp_mocks.c b/prov/sharp/src/sharp_mocks.c new file mode 100644 index 00000000000..3fd9df9c649 --- /dev/null +++ b/prov/sharp/src/sharp_mocks.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2022 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +/* mock of struct sharp_coll_context */ +struct sharp_coll_context { + int mock_content; +}; + +/** + * @brief SHARP coll context initialization + * + * This routine is initialize SHARP coll library and create @ref sharp_coll_context "SHARP coll context". + * This is a collective, called from all processes of the job. + * + * @warning An application cannot call any SHARP coll routine before sharp_coll_init + * + * @param [in] sharp_coll_spec SHARP coll specification descriptor. + * @param [out] sharp_coll_context Initialized @ref sharp_coll_context "SHARP coll context". + * + * @return Error code as defined by @ref sharp_error_no + */ +int sharp_coll_init(struct sharp_coll_init_spec *sharp_coll_spec, + struct sharp_coll_context **sharp_coll_context) +{ + struct sharp_coll_context *context; + context = calloc(1, sizeof(*context)); + if (!context) + return -FI_ENOMEM; + + *sharp_coll_context = context; + + return 0; +} + +/** + * @brief SHARP coll context finalize + * + * This routine finalizes and releases the resources associated with + * @ref sharp_coll_context "SHARP coll context". typically done once, just before the process ends. + * + * @warning An application cannot call any SHARP coll routine after sharp_coll_finalize + * + * @param [in] context SHARP coll context to cleanup. + * + * @return Error code as defined by @ref sharp_error_no + */ +int sharp_coll_finalize(struct sharp_coll_context *context) +{ + free(context); + + return 0; +} From 9a6521ac3b38248ee0cfc98590f011e22652872b Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Tue, 6 Dec 2022 14:44:30 +0100 Subject: [PATCH 10/10] prov/sharp: Use SHARP mocks in sharp_domain Signed-off-by: Lukasz Dorau --- prov/sharp/src/sharp_domain.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/prov/sharp/src/sharp_domain.c b/prov/sharp/src/sharp_domain.c index 76a66779fa5..4852f5f2730 100644 --- a/prov/sharp/src/sharp_domain.c +++ b/prov/sharp/src/sharp_domain.c @@ -32,6 +32,7 @@ #include #include +#include #include "sharp.h" @@ -115,16 +116,21 @@ static struct fi_ops_domain sharp_domain_ops = { static int sharp_domain_close(fid_t fid) { - int ret; + int ret = 0; struct sharp_domain *domain; domain = container_of(fid, struct sharp_domain, util_domain.domain_fid.fid); - /// mapped to int sharp_coll_finalize(struct sharp_coll_context *context); + + ret = sharp_coll_finalize(domain->sharp_context); + if (ret) + return ret; + ret = ofi_domain_close(&domain->util_domain); if (ret) return ret; free(domain); + return 0; } @@ -174,14 +180,9 @@ int sharp_domain2(struct fid_fabric *fabric, struct fi_info *info, if (!domain) return -FI_ENOMEM; - ret = ofi_domain_init(fabric, info, &domain->util_domain, context, - OFI_LOCK_MUTEX); - - - if (ret) { - free(domain); - return ret; - } + ret = ofi_domain_init(fabric, info, &domain->util_domain, context, OFI_LOCK_MUTEX); + if (ret) + goto err_free_domain; ofi_atomic_initialize32(&domain->ref, 0); domain->util_domain.threading = FI_THREAD_UNSPEC; @@ -196,10 +197,6 @@ int sharp_domain2(struct fid_fabric *fabric, struct fi_info *info, fid_domain_init(domain_fid, &domain->util_domain, &sharp_domain_fi_ops, &sharp_domain_ops, &sharp_domain_mr_ops); - -// XXX maped to -// int sharp_coll_init(struct sharp_coll_init_spec *sharp_coll_spec, -// struct sharp_coll_context **sharp_coll_context); #if 0 struct sharp_coll_init_spec { uint64_t job_id; /**< Job unique ID */ @@ -215,5 +212,15 @@ struct sharp_coll_init_spec { int reserved[4]; /**< Reserved */ }; #endif + + struct sharp_coll_init_spec sharp_coll_spec = {0}; + ret = sharp_coll_init(&sharp_coll_spec, (struct sharp_coll_context **)&domain->sharp_context); + if (ret) + goto err_free_domain; + return 0; + +err_free_domain: + free(domain); + return ret; }