diff --git a/op2/src/core/op_lib_core.cpp b/op2/src/core/op_lib_core.cpp index 265107436..a5ef2199c 100644 --- a/op2/src/core/op_lib_core.cpp +++ b/op2/src/core/op_lib_core.cpp @@ -476,13 +476,15 @@ op_dat op_decl_dat_core(op_set set, int dim, char const *type, int size, memcpy(new_data, data, (size_t)dim * (size_t)size * (size_t)set->size * sizeof(char)); dat->data = new_data; - } - else { - if (data != NULL) + dat->user_managed = 0; + } else { + if (data != NULL) { dat->data = data; - else { - char *new_data = (char *)op_malloc(bytes); + dat->user_managed = 1; + } else { + char *new_data = (char *)op_calloc(bytes, sizeof(char)); dat->data = new_data; + dat->user_managed = 0; } } @@ -490,7 +492,6 @@ op_dat op_decl_dat_core(op_set set, int dim, char const *type, int size, dat->name = copy_str(name); dat->type = copy_str(type); dat->size = dim * size; - dat->user_managed = 1; dat->mpi_buffer = NULL; dat->buffer_d = NULL; dat->buffer_d_r = NULL; @@ -508,14 +509,13 @@ op_dat op_decl_dat_core(op_set set, int dim, char const *type, int size, exit(-1); } item->dat = dat; - /*if (data == NULL) { -- this check would be good to have for Hydra, - but temp_dats prints this error .. so commented out - for now - printf("WARNING data pointer is NULL for %s!\n", name); - }*/ - item->orig_ptr = data; - // printf("orig_ptr for dat %s = %p\n", name, data); - // add item to the end of the list + + if (data != NULL) { + item->orig_ptr = data; + } else { + item->orig_ptr = dat->data; + } + if (TAILQ_EMPTY(&OP_dat_list)) { TAILQ_INSERT_HEAD(&OP_dat_list, item, entries); } else { @@ -1313,7 +1313,8 @@ void set_maps_base(int base) { } void *op_malloc(size_t size) { - return aligned_alloc(OP2_ALIGNMENT, size); + if (size == 0) return malloc(0); + return aligned_alloc(OP2_ALIGNMENT, (size + OP2_ALIGNMENT) - 1 & (-OP2_ALIGNMENT)); } // malloc to be exposed in Fortran API for use with Cray pointers diff --git a/op2/src/cuda/op_cuda_decl.cpp b/op2/src/cuda/op_cuda_decl.cpp index f6dd09d2b..3f6b4b98c 100644 --- a/op2/src/cuda/op_cuda_decl.cpp +++ b/op2/src/cuda/op_cuda_decl.cpp @@ -122,23 +122,7 @@ op_dat op_decl_dat_overlay_ptr(op_set set, char *dat) { op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size, char const *name) { - char *data = NULL; - op_dat dat = op_decl_dat_temp_core(set, dim, type, size, data, name); - - op_dat_entry *item; - op_dat_entry *tmp_item; - for (item = TAILQ_FIRST(&OP_dat_list); item != NULL; item = tmp_item) { - tmp_item = TAILQ_NEXT(item, entries); - - if (item->dat == dat) { - item->orig_ptr = (char *)dat->data; - break; - } - } - - for (size_t i = 0; i < set->size * dim * size; i++) - dat->data[i] = 0; - dat->user_managed = 0; + op_dat dat = op_decl_dat_temp_core(set, dim, type, size, NULL, name); size_t set_size = dat->set->size + dat->set->exec_size + dat->set->nonexec_size; if (strstr(dat->type, ":soa") != NULL || (OP_auto_soa && dat->dim > 1)) { @@ -149,7 +133,6 @@ op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size, op_deviceZero(dat->data_d, (size_t)(dat->size) * set_size); } - return dat; } diff --git a/op2/src/mpi/op_mpi_cuda_decl.cpp b/op2/src/mpi/op_mpi_cuda_decl.cpp index cf25d522f..ed20b8bee 100644 --- a/op2/src/mpi/op_mpi_cuda_decl.cpp +++ b/op2/src/mpi/op_mpi_cuda_decl.cpp @@ -119,16 +119,16 @@ op_dat op_decl_dat_char(op_set set, int dim, char const *type, int size, op_dat op_decl_dat_overlay(op_set set, op_dat dat) { op_dat overlay_dat = op_decl_dat_overlay_core(set, dat); - int halo_size = OP_import_exec_list[set->index]->size + - OP_import_nonexec_list[set->index]->size; - op_mpi_buffer mpi_buf = (op_mpi_buffer)xmalloc(sizeof(op_mpi_buffer_core)); halo_list exec_e_list = OP_export_exec_list[set->index]; halo_list nonexec_e_list = OP_export_nonexec_list[set->index]; - mpi_buf->buf_exec = (char *)xmalloc((exec_e_list->size) * overlay_dat->size); - mpi_buf->buf_nonexec = (char *)xmalloc((nonexec_e_list->size) * overlay_dat->size); + mpi_buf->buf_exec = (char *)xmalloc((size_t)(exec_e_list->size) * (size_t)overlay_dat->size); + + size_t import_extra = OP_partial_exchange ? set_import_buffer_size[set->index] : 0; + mpi_buf->buf_nonexec = (char *)xmalloc(((size_t)(nonexec_e_list->size) + import_extra) + * (size_t)overlay_dat->size); halo_list exec_i_list = OP_import_exec_list[set->index]; halo_list nonexec_i_list = OP_import_nonexec_list[set->index]; @@ -170,37 +170,19 @@ op_dat op_decl_dat_overlay_ptr(op_set set, char *dat) { op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size, char const *name) { - char *data = NULL; - op_dat dat = op_decl_dat_temp_core(set, dim, type, size, data, name); - - op_dat_entry *item; - op_dat_entry *tmp_item; - for (item = TAILQ_FIRST(&OP_dat_list); item != NULL; item = tmp_item) { - tmp_item = TAILQ_NEXT(item, entries); - - if (item->dat == dat) { - item->orig_ptr = (char *)dat->data; - break; - } - } + op_dat dat = op_decl_dat_temp_core(set, dim, type, size, NULL, name); // create empty data block to assign to this temporary dat (including the // halos) - size_t set_size = (size_t)set->size + (size_t)OP_import_exec_list[set->index]->size + - (size_t)OP_import_nonexec_list[set->index]->size; - - // initialize data bits to 0 - for (size_t i = 0; i < set_size * (size_t)dim * (size_t)size; i++) - dat->data[i] = 0; - - dat->user_managed = 0; + size_t set_size = (size_t)set->size + (size_t)OP_import_exec_list[set->index]->size + + (size_t)OP_import_nonexec_list[set->index]->size; // transpose if (strstr(dat->type, ":soa") != NULL || (OP_auto_soa && dat->dim > 1)) { cutilSafeCall( op_deviceMalloc((void **)&(dat->buffer_d_r), - (size_t)dat->size * (OP_import_exec_list[set->index]->size + - OP_import_nonexec_list[set->index]->size))); + (size_t)dat->size * ((size_t)OP_import_exec_list[set->index]->size + + (size_t)OP_import_nonexec_list[set->index]->size))); op_deviceMalloc((void **)&(dat->data_d), (size_t)(dat->size) * round32(set_size)); op_deviceZero(dat->data_d, (size_t)(dat->size) * round32(set_size)); @@ -212,14 +194,16 @@ op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size, // need to allocate mpi_buffers for this new temp_dat op_mpi_buffer mpi_buf = (op_mpi_buffer)xmalloc(sizeof(op_mpi_buffer_core)); - halo_list exec_e_list = OP_export_exec_list[set->index]; - halo_list nonexec_e_list = OP_export_nonexec_list[set->index]; + halo_list exec_e_list = OP_export_exec_list[dat->set->index]; + halo_list nonexec_e_list = OP_export_nonexec_list[dat->set->index]; - mpi_buf->buf_exec = (char *)xmalloc((exec_e_list->size) * (size_t)dat->size); - mpi_buf->buf_nonexec = (char *)xmalloc((nonexec_e_list->size) * (size_t)dat->size); + mpi_buf->buf_exec = (char *)xmalloc((size_t)(exec_e_list->size) * (size_t)dat->size); - halo_list exec_i_list = OP_import_exec_list[set->index]; - halo_list nonexec_i_list = OP_import_nonexec_list[set->index]; + size_t import_extra = OP_partial_exchange ? set_import_buffer_size[set->index] : 0; + mpi_buf->buf_nonexec = (char *)xmalloc(((size_t)(nonexec_e_list->size) + import_extra) * (size_t)dat->size); + + halo_list exec_i_list = OP_import_exec_list[dat->set->index]; + halo_list nonexec_i_list = OP_import_nonexec_list[dat->set->index]; mpi_buf->s_req = (MPI_Request *)xmalloc( sizeof(MPI_Request) * @@ -230,14 +214,13 @@ op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size, mpi_buf->s_num_req = 0; mpi_buf->r_num_req = 0; - dat->mpi_buffer = mpi_buf; // need to allocate device buffers for mpi comms for this new temp_dat - cutilSafeCall( - op_deviceMalloc((void **)&(dat->buffer_d), - (size_t)dat->size * (OP_export_exec_list[set->index]->size + - OP_export_nonexec_list[set->index]->size))); + cutilSafeCall(op_deviceMalloc((void **)&(dat->buffer_d), + (size_t)dat->size * (OP_export_exec_list[set->index]->size + + OP_export_nonexec_list[set->index]->size + + set_import_buffer_size[set->index]))); return dat; } @@ -288,7 +271,7 @@ size_t op_mv_halo_device(op_set set, op_dat dat) { cutilSafeCall( op_deviceMalloc((void **)&(dat->buffer_d_r), (size_t)dat->size * (OP_import_exec_list[set->index]->size + - OP_import_nonexec_list[set->index]->size))); + OP_import_nonexec_list[set->index]->size))); total_size += (size_t)dat->size * (OP_import_exec_list[set->index]->size + OP_import_nonexec_list[set->index]->size); @@ -304,8 +287,8 @@ size_t op_mv_halo_device(op_set set, op_dat dat) { cutilSafeCall( op_deviceMalloc((void **)&(dat->buffer_d), (size_t)dat->size * (OP_export_exec_list[set->index]->size + - OP_export_nonexec_list[set->index]->size + - set_import_buffer_size[set->index]))); + OP_export_nonexec_list[set->index]->size + + set_import_buffer_size[set->index]))); total_size += (size_t)dat->size * (OP_export_exec_list[set->index]->size + OP_export_nonexec_list[set->index]->size + diff --git a/op2/src/mpi/op_mpi_decl.cpp b/op2/src/mpi/op_mpi_decl.cpp index 59b243335..b8232ee4d 100644 --- a/op2/src/mpi/op_mpi_decl.cpp +++ b/op2/src/mpi/op_mpi_decl.cpp @@ -132,8 +132,11 @@ op_dat op_decl_dat_overlay(op_set set, op_dat dat) { halo_list exec_e_list = OP_export_exec_list[set->index]; halo_list nonexec_e_list = OP_export_nonexec_list[set->index]; - mpi_buf->buf_exec = (char *)xmalloc((exec_e_list->size) * overlay_dat->size); - mpi_buf->buf_nonexec = (char *)xmalloc((nonexec_e_list->size) * overlay_dat->size); + mpi_buf->buf_exec = (char *)xmalloc((size_t)(exec_e_list->size) * (size_t)overlay_dat->size); + + size_t import_extra = OP_partial_exchange ? set_import_buffer_size[set->index] : 0; + mpi_buf->buf_nonexec = (char *)xmalloc(((size_t)(nonexec_e_list->size) + import_extra) + * (size_t)overlay_dat->size); halo_list exec_i_list = OP_import_exec_list[set->index]; halo_list nonexec_i_list = OP_import_nonexec_list[set->index]; @@ -175,42 +178,26 @@ op_dat op_decl_dat_overlay_ptr(op_set set, char *dat) { op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size, char const *name) { - char *d = NULL; - op_dat dat = op_decl_dat_temp_core(set, dim, type, size, d, name); - - op_dat_entry *item; - op_dat_entry *tmp_item; - for (item = TAILQ_FIRST(&OP_dat_list); item != NULL; item = tmp_item) { - tmp_item = TAILQ_NEXT(item, entries); - - if (item->dat == dat) { - item->orig_ptr = (char *)dat->data; - break; - } - } + op_dat dat = op_decl_dat_temp_core(set, dim, type, size, NULL, name); // create empty data block to assign to this temporary dat (including the // halos) - int halo_size = OP_import_exec_list[set->index]->size + - OP_import_nonexec_list[set->index]->size; - - // initialize data bits to 0 - //dat->data = (char *)calloc((set->size + halo_size) * dim * size, 1); - for (size_t i = 0; i < (set->size + halo_size) * dim * size; i++) - dat->data[i] = 0; - dat->user_managed = 0; + size_t set_size = (size_t)set->size + (size_t)OP_import_exec_list[set->index]->size + + (size_t)OP_import_nonexec_list[set->index]->size; // need to allocate mpi_buffers for this new temp_dat op_mpi_buffer mpi_buf = (op_mpi_buffer)xmalloc(sizeof(op_mpi_buffer_core)); - halo_list exec_e_list = OP_export_exec_list[set->index]; - halo_list nonexec_e_list = OP_export_nonexec_list[set->index]; + halo_list exec_e_list = OP_export_exec_list[dat->set->index]; + halo_list nonexec_e_list = OP_export_nonexec_list[dat->set->index]; - mpi_buf->buf_exec = (char *)xmalloc((exec_e_list->size) * dat->size); - mpi_buf->buf_nonexec = (char *)xmalloc((nonexec_e_list->size) * dat->size); + mpi_buf->buf_exec = (char *)xmalloc((size_t)(exec_e_list->size) * (size_t)dat->size); - halo_list exec_i_list = OP_import_exec_list[set->index]; - halo_list nonexec_i_list = OP_import_nonexec_list[set->index]; + size_t import_extra = OP_partial_exchange ? set_import_buffer_size[set->index] : 0; + mpi_buf->buf_nonexec = (char *)xmalloc(((size_t)(nonexec_e_list->size) + import_extra) * (size_t)dat->size); + + halo_list exec_i_list = OP_import_exec_list[dat->set->index]; + halo_list nonexec_i_list = OP_import_nonexec_list[dat->set->index]; mpi_buf->s_req = (MPI_Request *)xmalloc( sizeof(MPI_Request) * @@ -221,7 +208,6 @@ op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size, mpi_buf->s_num_req = 0; mpi_buf->r_num_req = 0; - dat->mpi_buffer = mpi_buf; return dat; diff --git a/op2/src/openmp/op_openmp_decl.cpp b/op2/src/openmp/op_openmp_decl.cpp index c846b560f..50da56fda 100644 --- a/op2/src/openmp/op_openmp_decl.cpp +++ b/op2/src/openmp/op_openmp_decl.cpp @@ -102,24 +102,7 @@ op_dat op_decl_dat_overlay_ptr(op_set set, char *dat) { op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size, char const *name) { - char *data = NULL; - op_dat dat = op_decl_dat_temp_core(set, dim, type, size, data, name); - - op_dat_entry *item; - op_dat_entry *tmp_item; - for (item = TAILQ_FIRST(&OP_dat_list); item != NULL; item = tmp_item) { - tmp_item = TAILQ_NEXT(item, entries); - - if (item->dat == dat) { - item->orig_ptr = (char *)dat->data; - break; - } - } - - for (size_t i = 0; i < set->size * dim * size; i++) - dat->data[i] = 0; - dat->user_managed = 0; - return dat; + return op_decl_dat_temp_core(set, dim, type, size, NULL, name); } int op_free_dat_temp_char(op_dat dat) { return op_free_dat_temp_core(dat); } diff --git a/op2/src/openmp4/op_openmp4_decl.cpp b/op2/src/openmp4/op_openmp4_decl.cpp index 36c4bebf8..7a815575b 100644 --- a/op2/src/openmp4/op_openmp4_decl.cpp +++ b/op2/src/openmp4/op_openmp4_decl.cpp @@ -96,23 +96,7 @@ op_dat op_decl_dat_overlay_ptr(op_set set, char *dat) { op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size, char const *name) { - char *data = NULL; - op_dat dat = op_decl_dat_temp_core(set, dim, type, size, data, name); - - op_dat_entry *item; - op_dat_entry *tmp_item; - for (item = TAILQ_FIRST(&OP_dat_list); item != NULL; item = tmp_item) { - tmp_item = TAILQ_NEXT(item, entries); - - if (item->dat == dat) { - item->orig_ptr = (char *)dat->data; - break; - } - } - - for (size_t i = 0; i < set->size * dim * size; i++) - dat->data[i] = 0; - dat->user_managed = 0; + op_dat dat = op_decl_dat_temp_core(set, dim, type, size, NULL, name); // transpose data if (strstr(type, ":soa") != NULL || (OP_auto_soa && dim > 1)) { diff --git a/op2/src/sequential/op_seq.cpp b/op2/src/sequential/op_seq.cpp index 89cf15932..059a5fd9d 100644 --- a/op2/src/sequential/op_seq.cpp +++ b/op2/src/sequential/op_seq.cpp @@ -109,24 +109,7 @@ int op_free_dat_temp_char(op_dat dat) { return op_free_dat_temp_core(dat); } op_dat op_decl_dat_temp_char(op_set set, int dim, char const *type, int size, char const *name) { - char *data = NULL; - op_dat dat = op_decl_dat_temp_core(set, dim, type, size, data, name); - - op_dat_entry *item; - op_dat_entry *tmp_item; - for (item = TAILQ_FIRST(&OP_dat_list); item != NULL; item = tmp_item) { - tmp_item = TAILQ_NEXT(item, entries); - - if (item->dat == dat) { - item->orig_ptr = (char *)dat->data; - break; - } - } - - for (size_t i = 0; i < set->size * dim * size; i++) - dat->data[i] = 0; - dat->user_managed = 0; - return dat; + return op_decl_dat_temp_core(set, dim, type, size, NULL, name); } /*