-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CUDA: enable cuda support v1 - EAGER with GDR COPY #20
base: master
Are you sure you want to change the base?
Changes from 1 commit
a1b6607
87d29ef
ff3bcd0
4ed2a7d
87ecdd7
9cbca08
4c1417b
369d416
2915cdb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -81,6 +81,9 @@ typedef struct ucp_ep_config_key { | |
/* Lanes for atomic operations, sorted by priority, highest first */ | ||
ucp_lane_index_t amo_lanes[UCP_MAX_LANES]; | ||
|
||
/* Lanes for domain operations, sorted by priority, highest first */ | ||
ucp_lane_index_t domain_lanes[UCP_MAX_LANES]; | ||
|
||
/* Bitmap of remote mds which are reachable from this endpoint (with any set | ||
* of transports which could be selected in the future). | ||
*/ | ||
|
@@ -106,6 +109,17 @@ typedef struct ucp_ep_rma_config { | |
} ucp_ep_rma_config_t; | ||
|
||
|
||
#define UCP_IS_DEFAULT_ADDR_DOMAIN(_addr_dn_h) (_addr_dn_h == &ucp_addr_dn_dummy_handle) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (_addr_dn_h == &ucp_addr_dn_dummy_handle) -> ((_addr_dn_h) == &ucp_addr_dn_dummy_handle) |
||
|
||
typedef struct ucp_ep_addr_domain_config { | ||
struct { | ||
struct { | ||
ssize_t max_short; | ||
size_t zcopy_thresh[UCP_MAX_IOV]; | ||
} eager; | ||
} tag; | ||
} ucp_ep_addr_domain_config_t; | ||
|
||
/* | ||
* Configuration for AM and tag offload protocols | ||
*/ | ||
|
@@ -136,6 +150,10 @@ typedef struct ucp_ep_config { | |
*/ | ||
ucp_lane_map_t p2p_lanes; | ||
|
||
/* Bitmap of which lanes are domain lanes | ||
*/ | ||
ucp_lane_map_t domain_lanes; | ||
|
||
/* Configuration for each lane that provides RMA */ | ||
ucp_ep_rma_config_t rma[UCP_MAX_LANES]; | ||
/* Threshold for switching from put_short to put_bcopy */ | ||
|
@@ -179,8 +197,11 @@ typedef struct ucp_ep_config { | |
* (currently it's only AM based). */ | ||
const ucp_proto_t *proto; | ||
} stream; | ||
} ucp_ep_config_t; | ||
|
||
/* Configuration of all domains */ | ||
ucp_ep_addr_domain_config_t domain[UCP_MAX_LANES]; | ||
|
||
} ucp_ep_config_t; | ||
|
||
/** | ||
* Remote protocol layer endpoint | ||
|
@@ -245,4 +266,8 @@ size_t ucp_ep_config_get_zcopy_auto_thresh(size_t iovcnt, | |
const ucp_context_h context, | ||
double bandwidth); | ||
|
||
ucp_lane_index_t ucp_config_find_domain_lane(const ucp_ep_config_t *config, | ||
const ucp_lane_index_t *lanes, | ||
ucp_md_map_t dn_md_map); | ||
ucs_status_t ucp_ep_set_domain_lanes(ucp_ep_h ep, ucp_addr_dn_h addr_dn_h); | ||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
*/ | ||
|
||
#include "dt.h" | ||
#include <ucp/core/ucp_request.inl> | ||
|
||
|
||
size_t ucp_dt_pack(ucp_datatype_t datatype, void *dest, const void *src, | ||
|
@@ -44,3 +45,123 @@ size_t ucp_dt_pack(ucp_datatype_t datatype, void *dest, const void *src, | |
state->offset += result_len; | ||
return result_len; | ||
} | ||
|
||
static UCS_F_ALWAYS_INLINE ucs_status_t ucp_dn_dt_unpack(ucp_request_t *req, void *buffer, size_t buffer_size, | ||
const void *recv_data, size_t recv_length) | ||
{ | ||
ucs_status_t status; | ||
ucp_worker_h worker = req->recv.worker; | ||
ucp_context_h context = worker->context; | ||
ucp_ep_h ep = ucp_worker_ep_find(worker, worker->uuid); | ||
ucp_ep_config_t *config = ucp_ep_config(ep); | ||
ucp_md_map_t dn_md_map = req->addr_dn_h->md_map; | ||
ucp_lane_index_t dn_lane; | ||
ucp_rsc_index_t rsc_index; | ||
uct_iface_attr_t *iface_attr; | ||
unsigned md_index; | ||
uct_mem_h memh; | ||
uct_iov_t iov; | ||
|
||
if (recv_length == 0) { | ||
return UCS_OK; | ||
} | ||
|
||
while(1) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. space after while: |
||
dn_lane = ucp_config_find_domain_lane(config, config->key.domain_lanes, dn_md_map); | ||
if (dn_lane == UCP_NULL_LANE) { | ||
ucs_error("Not find address domain lane."); | ||
return UCS_ERR_IO_ERROR; | ||
} | ||
rsc_index = ucp_ep_get_rsc_index(ep, dn_lane); | ||
iface_attr = &worker->ifaces[rsc_index].attr; | ||
md_index = config->key.lanes[dn_lane].dst_md_index; | ||
if (!(iface_attr->cap.flags & UCT_IFACE_FLAG_PUT_ZCOPY)) { | ||
dn_md_map |= ~UCS_BIT(md_index); | ||
continue; | ||
} | ||
break; | ||
} | ||
|
||
|
||
status = uct_md_mem_reg(context->tl_mds[md_index].md, buffer, buffer_size, | ||
UCT_MD_MEM_ACCESS_REMOTE_PUT, &memh); | ||
if (status != UCS_OK) { | ||
ucs_error("Failed to reg address %p with md %s", buffer, | ||
context->tl_mds[md_index].rsc.md_name); | ||
return status; | ||
} | ||
|
||
ucs_assert(buffer_size >= recv_length); | ||
iov.buffer = (void *)recv_data; | ||
iov.length = recv_length; | ||
iov.count = 1; | ||
iov.memh = UCT_MEM_HANDLE_NULL; | ||
|
||
|
||
status = uct_ep_put_zcopy(ep->uct_eps[dn_lane], &iov, 1, (uint64_t)buffer, | ||
(uct_rkey_t )memh, NULL); | ||
if (status != UCS_OK) { | ||
uct_md_mem_dereg(context->tl_mds[md_index].md, memh); | ||
ucs_error("Failed to perform uct_ep_put_zcopy to address %p", recv_data); | ||
return status; | ||
} | ||
|
||
status = uct_md_mem_dereg(context->tl_mds[md_index].md, memh); | ||
if (status != UCS_OK) { | ||
ucs_error("Failed to dereg address %p with md %s", buffer, | ||
context->tl_mds[md_index].rsc.md_name); | ||
return status; | ||
} | ||
|
||
return UCS_OK; | ||
} | ||
|
||
|
||
ucs_status_t ucp_dt_unpack(ucp_request_t *req, ucp_datatype_t datatype, void *buffer, size_t buffer_size, | ||
ucp_dt_state_t *state, const void *recv_data, size_t recv_length, int last) | ||
{ | ||
ucp_dt_generic_t *dt_gen; | ||
size_t offset = state->offset; | ||
ucs_status_t status; | ||
|
||
if (ucs_unlikely((recv_length + offset) > buffer_size)) { | ||
ucs_trace_req("message truncated: recv_length %zu offset %zu buffer_size %zu", | ||
recv_length, offset, buffer_size); | ||
if (UCP_DT_IS_GENERIC(datatype) && last) { | ||
ucp_dt_generic(datatype)->ops.finish(state->dt.generic.state); | ||
} | ||
return UCS_ERR_MESSAGE_TRUNCATED; | ||
} | ||
|
||
switch (datatype & UCP_DATATYPE_CLASS_MASK) { | ||
case UCP_DATATYPE_CONTIG: | ||
if (ucs_likely(UCP_IS_DEFAULT_ADDR_DOMAIN(req->addr_dn_h))) { | ||
UCS_PROFILE_NAMED_CALL("memcpy_recv", memcpy, buffer + offset, | ||
recv_data, recv_length); | ||
return UCS_OK; | ||
} else { | ||
return ucp_dn_dt_unpack(req, buffer, buffer_size, recv_data, recv_length); | ||
} | ||
|
||
case UCP_DATATYPE_IOV: | ||
UCS_PROFILE_CALL(ucp_dt_iov_scatter, buffer, state->dt.iov.iovcnt, | ||
recv_data, recv_length, &state->dt.iov.iov_offset, | ||
&state->dt.iov.iovcnt_offset); | ||
return UCS_OK; | ||
|
||
case UCP_DATATYPE_GENERIC: | ||
dt_gen = ucp_dt_generic(datatype); | ||
status = UCS_PROFILE_NAMED_CALL("dt_unpack", dt_gen->ops.unpack, | ||
state->dt.generic.state, offset, | ||
recv_data, recv_length); | ||
if (last) { | ||
UCS_PROFILE_NAMED_CALL_VOID("dt_finish", dt_gen->ops.finish, | ||
state->dt.generic.state); | ||
} | ||
return status; | ||
|
||
default: | ||
ucs_error("unexpected datatype=%lx", datatype); | ||
return UCS_ERR_INVALID_PARAM; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -731,7 +731,7 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_rndv_data_handler, | |
} | ||
|
||
UCS_PROFILE_REQUEST_EVENT(rreq, "rndv_data_recv", recv_len); | ||
status = ucp_dt_unpack(rreq->recv.datatype, rreq->recv.buffer, | ||
status = ucp_dt_unpack(rreq, rreq->recv.datatype, rreq->recv.buffer, | ||
rreq->recv.length, &rreq->recv.state, | ||
data + hdr_len, recv_len, 0); | ||
if ((status == UCS_OK) || (status == UCS_INPROGRESS)) { | ||
|
@@ -764,9 +764,9 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_rndv_data_last_handler, | |
/* Check that total received length matches RTS->length */ | ||
ucs_assert(rreq->recv.info.length == rreq->recv.state.offset + recv_len); | ||
UCS_PROFILE_REQUEST_EVENT(rreq, "rndv_data_last_recv", recv_len); | ||
status = ucp_dt_unpack(rreq->recv.datatype, rreq->recv.buffer, | ||
rreq->recv.length, &rreq->recv.state, | ||
data + hdr_len, recv_len, 1); | ||
status = ucp_dt_unpack(rreq, rreq->recv.datatype, rreq->recv.buffer, | ||
rreq->recv.length, &rreq->recv.state, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. allignment |
||
data + hdr_len, recv_len, 1); | ||
} else { | ||
ucs_trace_data("drop last segment for rreq %p, length %zu, status %s", | ||
rreq, recv_len, ucs_status_string(rreq->status)); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thrshold -> threshold