Skip to content

Commit

Permalink
mpl/ze: add an option to do GPU copy with and without round robin
Browse files Browse the repository at this point in the history
specify if use command queues in round robin or not.
  • Loading branch information
zhenggb72 authored and abrooks98 committed Jul 20, 2023
1 parent fcd7c8e commit 3b9cd40
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 13 deletions.
12 changes: 12 additions & 0 deletions src/mpi/init/mpir_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,17 @@
description : >-
If true, mpl/ze will use immediate command list for copying
- name : MPIR_CVAR_GPU_ROUND_ROBIN_COMMAND_QUEUES
category : GPU
type : boolean
default : false
class : none
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_ALL_EQ
description : >-
If true, mpl/ze will use command queues in a round-robin fashion.
If false, only command queues of index 0 will be used.
- name : MPIR_CVAR_NO_COLLECTIVE_FINALIZE
category : COLLECTIVE
type : boolean
Expand Down Expand Up @@ -225,6 +236,7 @@ int MPII_Init_thread(int *argc, char ***argv, int user_required, int *provided,

MPL_gpu_info.specialized_cache = specialized_cache;
MPL_gpu_info.use_immediate_cmdlist = MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST;
MPL_gpu_info.roundrobin_cmdq = MPIR_CVAR_GPU_ROUND_ROBIN_COMMAND_QUEUES;

int mpl_errno = MPL_gpu_init(debug_summary);
MPIR_ERR_CHKANDJUMP(mpl_errno != MPL_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**gpu_init");
Expand Down
1 change: 1 addition & 0 deletions src/mpl/include/mpl_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ typedef struct {
/* Input */
int debug_summary;
bool use_immediate_cmdlist;
bool roundrobin_cmdq;
/* Output */
bool enable_ipc;
MPL_gpu_ipc_handle_type_t ipc_handle_type;
Expand Down
40 changes: 27 additions & 13 deletions src/mpl/src/gpu/mpl_gpu_ze.c
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ static int mmapFunction(int nfds, int *fds, size_t size, void **ptr)
if (*ptr == (void *) -1) {
mpl_err = MPL_ERR_GPU_INTERNAL;
perror("mmap device to host");
printf("gdr_handle_open failed fd: %d\n", fds[0]);
printf("mmap failed fd: %d size: %ld\n", fds[0], size);
goto fn_fail;
}
} else {
Expand Down Expand Up @@ -2058,13 +2058,20 @@ static inline int get_immediate_cmdlist(int *dev, MPL_gpu_copy_direction_t dir,
MPL_ze_engine_entry_t *engine_state = device_state->engines + engine;

if (dir == MPL_GPU_COPY_DIRECTION_NONE) {
index = engine_state->curQueue;
/* move to next queue */
engine_state->curQueue++;
if (engine_state->curQueue == engine_state->numQueues)
engine_state->curQueue = 0;
if (MPL_gpu_info.roundrobin_cmdq) {
index = engine_state->curQueue;
/* move to next queue */
engine_state->curQueue++;
if (engine_state->curQueue == engine_state->numQueues)
engine_state->curQueue = 0;
} else {
index = 0;
}
} else {
index = dir % engine_state->numQueues;
if (MPL_gpu_info.roundrobin_cmdq)
index = dir % engine_state->numQueues;
else
index = 0;
}

if (!engine_state->cmdlists[index]) {
Expand Down Expand Up @@ -2168,13 +2175,20 @@ static int MPL_gpu_imemcpy_normal(void *dest_ptr, void *src_ptr, size_t size, in
ZE_ERR_CHECK(ret);
int q_index;
if (dir == MPL_GPU_COPY_DIRECTION_NONE) {
q_index = engine_state->curQueue;
/* move to next queue */
engine_state->curQueue++;
if (engine_state->curQueue == engine_state->numQueues)
engine_state->curQueue = 0;
if (MPL_gpu_info.roundrobin_cmdq) {
q_index = engine_state->curQueue;
/* move to next queue */
engine_state->curQueue++;
if (engine_state->curQueue == engine_state->numQueues)
engine_state->curQueue = 0;
} else {
q_index = 0;
}
} else {
q_index = dir % engine_state->numQueues;
if (MPL_gpu_info.roundrobin_cmdq)
q_index = dir % engine_state->numQueues;
else
q_index = 0;
}
assert(engine_state->cmdQueues);
ze_command_queue_handle_t cmdq = engine_state->cmdQueues[q_index];
Expand Down

0 comments on commit 3b9cd40

Please sign in to comment.