Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

decl_hdf5 MPI-I/O Collective or Independent, fix #419 #502

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions example/decl_hdf5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ pdi:
write:
main_field: # the name of the data to write
dataset: data
mpio: COLLECTIVE # or INDEPENDENT
when: '$iter<10' # do only write the first 10 iterations (0...9)
memory_selection: # exclude ghosts from the data in memory
size: ['$dsize[0]-2', '$dsize[1]-2']
Expand Down
6 changes: 6 additions & 0 deletions plugins/decl_hdf5/AUTHORS
jmorice91 marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ Julien Bigot - CEA ([email protected])
* Run tests that depend on the filesystem in their own temporary directory
* Buildsystem

Yacine Ould Rouis - CNRS ([email protected])
* contribution to feature design, validation

Jacques Morice - CEA ([email protected])
* contribution to feature improvement, validation

Thomas Padioleau - CEA ([email protected])
* Fixed a bug with parallel file deletion

Expand Down
2 changes: 2 additions & 0 deletions plugins/decl_hdf5/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [Unreleased]

### Added
* Possibility to choose parallel MPI-I/O mode: either COLLECTIVE or INDEPENDENT
[#419](https://gitlab.maisondelasimulation.fr/pdidev/pdi/-/issues/419)

### Changed

Expand Down
3 changes: 3 additions & 0 deletions plugins/decl_hdf5/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ The possible values for the keys are as follow:
See
https://support.hdfgroup.org/HDF5/doc/RM/RM_H5P.html#Property-SetFletcher32
for more information.
* `mpio` : a string expression to define the type of MPI-I/O parallel pointer
for the operation among two choices : `COLLECTIVE` (default) and `INDEPENDENT`.

### SELECTION_DESC

Expand Down Expand Up @@ -219,6 +221,7 @@ plugins:
- dataset: data/array # a dataset name (default: the data name)
when: "$iter>0&$iter<11" # an additional condition when to actually trigger the actions (default: always true)
communicator: $MPI_COMM_SELF # the MPI communicator used for HDF5 parallel synchronized write (default: that of the file)
mpio: INDEPENDENT # the type of MPI-I/O parallel pointer (default: COLLECTIVE)
memory_selection:
size: [$width-2, $height-2] # number of elements to transfer in each dimension (default: size of the full data)
start: [1, 1] # coordinate of the start point in memory relative to the shared data (default: 0 in each dimensions)
Expand Down
18 changes: 15 additions & 3 deletions plugins/decl_hdf5/dataset_op.cxx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*******************************************************************************
* Copyright (C) 2015-2024 Commissariat a l'energie atomique et aux energies alternatives (CEA)
* Copyright (C) 2021 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC)
* Copyright (C) 2021-2022 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -164,6 +164,14 @@ Dataset_op::Dataset_op(Direction dir, string name, Expression default_when, PC_t
m_fletcher = value;
} else if (key == "attributes") {
// pass
} else if (key == "mpio") {
if (to_string(value) == "INDEPENDENT") {
m_mpio = H5FD_MPIO_INDEPENDENT;
} else if (to_string(value) == "COLLECTIVE") {
m_mpio = H5FD_MPIO_COLLECTIVE;
} else {
throw Config_error{key_tree, "Not valid mpio value: `{}'. Expecting INDEPENDENT or COLLECTIVE.", to_string(value)};
}
} else if (key == "collision_policy") {
m_collision_policy = to_collision_policy(to_string(value));
} else {
Expand Down Expand Up @@ -203,8 +211,12 @@ void Dataset_op::fletcher(Context& ctx, Expression value)
}
}

void Dataset_op::execute(Context& ctx, hid_t h5_file, hid_t xfer_lst, const unordered_map<string, Datatype_template_sptr>& dsets)
void Dataset_op::execute(Context& ctx, hid_t h5_file, bool use_mpio, const unordered_map<string, Datatype_template_sptr>& dsets)
{
Raii_hid xfer_lst = make_raii_hid(H5Pcreate(H5P_DATASET_XFER), H5Pclose);
if (use_mpio) {
if (0 > H5Pset_dxpl_mpio(xfer_lst, m_mpio)) handle_hdf5_err();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My personal opinion: I think that all if/else blocks should use curly braces, even if there is only 1 line.

Suggested change
if (0 > H5Pset_dxpl_mpio(xfer_lst, m_mpio)) handle_hdf5_err();
if (0 > H5Pset_dxpl_mpio(xfer_lst, m_mpio)) {
handle_hdf5_err();
}

}
if (m_direction == READ)
do_read(ctx, h5_file, xfer_lst);
else
Comment on lines 220 to 222
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (m_direction == READ)
do_read(ctx, h5_file, xfer_lst);
else
if (m_direction == READ) {
do_read(ctx, h5_file, xfer_lst);
} else {

Expand Down Expand Up @@ -328,7 +340,7 @@ hid_t Dataset_op::dataset_creation_plist(Context& ctx, const Datatype* dataset_t
return dset_plist;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return dset_plist;
return dset_plist;
}

}

void Dataset_op::do_write(Context& ctx, hid_t h5_file, hid_t write_lst, const unordered_map<string, PDI::Datatype_template_sptr>& dsets)
void Dataset_op::do_write(Context& ctx, hid_t h5_file, hid_t write_lst, const unordered_map<string, Datatype_template_sptr>& dsets)
{
string dataset_name = m_dataset.to_string(ctx);
ctx.logger().trace("Preparing for writing `{}' dataset", dataset_name);
Expand Down
10 changes: 6 additions & 4 deletions plugins/decl_hdf5/dataset_op.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*******************************************************************************
* Copyright (C) 2015-2024 Commissariat a l'energie atomique et aux energies alternatives (CEA)
* Copyright (C) 2021 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC)
* Copyright (C) 2021-2022 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -67,6 +67,9 @@ class Dataset_op
/// direction of the transfer (read or write)
Direction m_direction;

/// the type of MPI-I/O parallel pointer (default = COLLECTIVE)
H5FD_mpio_xfer_t m_mpio = H5FD_MPIO_COLLECTIVE;

/// the name of the dataset where to transfer
PDI::Expression m_dataset;

Expand Down Expand Up @@ -182,11 +185,10 @@ class Dataset_op
*
* \param ctx the context in which to operate
* \param h5_file the already opened HDF5 file id
* \param xfer_lst the already created transfer property list including any
* parallel HDF5 required property.
* \param use_mpio whether the hdf5 read/write is parallel
* \param dsets the type of the explicitly typed datasets
*/
void execute(PDI::Context& ctx, hid_t h5_file, hid_t xfer_lst, const std::unordered_map<std::string, PDI::Datatype_template_sptr>& dsets);
void execute(PDI::Context& ctx, hid_t h5_file, bool use_mpio, const std::unordered_map<std::string, PDI::Datatype_template_sptr>& dsets);

private:
void do_read(PDI::Context& ctx, hid_t h5_file, hid_t read_lst);
Expand Down
10 changes: 5 additions & 5 deletions plugins/decl_hdf5/file_op.cxx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*******************************************************************************
* Copyright (C) 2015-2024 Commissariat a l'energie atomique et aux energies alternatives (CEA)
* Copyright (C) 2021 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC)
* Copyright (C) 2021-2022 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -280,15 +280,15 @@ void File_op::execute(Context& ctx)
std::string filename = m_file.to_string(ctx);

Raii_hid file_lst = make_raii_hid(H5Pcreate(H5P_FILE_ACCESS), H5Pclose);
Raii_hid xfer_lst = make_raii_hid(H5Pcreate(H5P_DATASET_XFER), H5Pclose);
bool use_mpio = false;
#ifdef H5_HAVE_PARALLEL
MPI_Comm comm = MPI_COMM_SELF;
if (communicator()) {
comm = *(static_cast<const MPI_Comm*>(Ref_r{communicator().to_ref(ctx)}.get()));
}
if (comm != MPI_COMM_SELF) {
if (0 > H5Pset_fapl_mpio(file_lst, comm, MPI_INFO_NULL)) handle_hdf5_err();
if (0 > H5Pset_dxpl_mpio(xfer_lst, H5FD_MPIO_COLLECTIVE)) handle_hdf5_err();
use_mpio = true;
ctx.logger().debug("Opening `{}' file in parallel mode", filename);
}
#endif
Expand Down Expand Up @@ -337,10 +337,10 @@ void File_op::execute(Context& ctx)
Raii_hid h5_file = make_raii_hid(h5_file_raw, H5Fclose, ("Cannot open `" + filename + "' file").c_str());

for (auto&& one_dset_op: dset_writes) {
one_dset_op.execute(ctx, h5_file, xfer_lst, m_datasets);
one_dset_op.execute(ctx, h5_file, use_mpio, m_datasets);
}
for (auto&& one_dset_op: dset_reads) {
one_dset_op.execute(ctx, h5_file, xfer_lst, m_datasets);
one_dset_op.execute(ctx, h5_file, use_mpio, m_datasets);
}
for (auto&& one_attr_op: attr_writes) {
one_attr_op.execute(ctx, h5_file);
Expand Down
13 changes: 11 additions & 2 deletions plugins/decl_hdf5/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#=============================================================================
# Copyright (C) 2015-2024 Commissariat a l'energie atomique et aux energies alternatives (CEA)
# Copyright (C) 2021 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC)
#
# Copyright (C) 2022 Centre National de Recherche Scientifique (CNRS)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -95,7 +95,7 @@ if("${BUILD_HDF5_PARALLEL}")
set_property(TEST decl_hdf5_mpi_04_C PROPERTY PROCESSORS 4)
endif()

# bcommunicator as a reference
# communicator as a reference
if("${BUILD_HDF5_PARALLEL}")
add_executable(decl_hdf5_mpi_05_C decl_hdf5_mpi_test_05.c)
target_link_libraries(decl_hdf5_mpi_05_C PDI::PDI_C MPI::MPI_C)
Expand All @@ -113,6 +113,15 @@ if("${BUILD_HDF5_PARALLEL}")
set_property(TEST decl_hdf5_mpi_06_C PROPERTY PROCESSORS 4)
endif()

# mpio dataset attribute
if("${BUILD_HDF5_PARALLEL}")
add_executable(decl_hdf5_mpi_07_C decl_hdf5_mpi_test_07.c)
target_link_libraries(decl_hdf5_mpi_07_C PDI::PDI_C MPI::MPI_C)
add_test(NAME decl_hdf5_mpi_07_C COMMAND "${RUNTEST_DIR}" "${MPIEXEC}" "${MPIEXEC_NUMPROC_FLAG}" 4 ${MPIEXEC_PREFLAGS} "$<TARGET_FILE:decl_hdf5_mpi_07_C>" ${MPIEXEC_POSTFLAGS})
set_property(TEST decl_hdf5_mpi_07_C PROPERTY TIMEOUT 15)
set_property(TEST decl_hdf5_mpi_07_C PROPERTY PROCESSORS 4)
endif()

add_executable(decl_hdf5_IO_options_C decl_hdf5_test_IO_options.c)
target_link_libraries(decl_hdf5_IO_options_C PDI::PDI_C ${HDF5_DEPS})
add_test(NAME decl_hdf5_IO_options_C COMMAND "${RUNTEST_DIR}" "$<TARGET_FILE:decl_hdf5_IO_options_C>")
Expand Down
6 changes: 3 additions & 3 deletions plugins/decl_hdf5/tests/compatibility_tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ set(RUNTEST_DIR "${CMAKE_CURRENT_LIST_DIR}/../../cmake/runtest-dir")

set(ALL_TEST_NAMES array dataset group struct variables)
if("${BUILD_HDF5_PARALLEL}")
list(APPEND ALL_TEST_NAMES mpi)
list(APPEND ALL_TEST_NAMES mpi mpi_independent)
endif()

foreach(test_name ${ALL_TEST_NAMES})
Expand All @@ -39,7 +39,7 @@ foreach(test_name ${ALL_TEST_NAMES})

add_executable("PDI_C_${test_name}_${io_operation}_test" "PDI_C/${test_name}_${io_operation}_test.c")
target_link_libraries("PDI_C_${test_name}_${io_operation}_test" PDI::PDI_C)
if ("${test_name}" STREQUAL "mpi")
if ( ("${test_name}" STREQUAL "mpi") OR ("${test_name}" STREQUAL "mpi_independent") )
target_link_libraries("HDF5_C_${test_name}_${io_operation}_test" MPI::MPI_C)
target_link_libraries("PDI_C_${test_name}_${io_operation}_test" MPI::MPI_C)
endif()
Expand All @@ -49,7 +49,7 @@ foreach(test_name ${ALL_TEST_NAMES})
foreach(read_prefix HDF5_C_ PDI_C_)
set(write_test_name "${write_prefix}${test_name}_write_test")
set(read_test_name "${read_prefix}${test_name}_read_test")
if ("${test_name}" STREQUAL "mpi")
if ( ("${test_name}" STREQUAL "mpi") OR ("${test_name}" STREQUAL "mpi_independent") )
set(MPI_CASE_CMD "${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 2 ${MPIEXEC_PREFLAGS}")
set(MPI_POST_CMD " ${MPIEXEC_POSTFLAGS}")
else()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*******************************************************************************
* Copyright (C) 2020 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of CEA nor the names of its contributors may be used to
* endorse or promote products derived from this software without specific
* prior written permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/

#include <mpi.h>
#include <assert.h>
#include <hdf5.h>
#include <unistd.h>

#define FILE "mpi_independent_test.h5"

/*
Test : Read a file using hdf5 parallel version with the option independent parallel pointer.
*/

int main(int argc, char* argv[])
{
printf("HDF5 mpi_independent_read_test started\n");
MPI_Init(&argc, &argv);
int mpi_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
herr_t status = H5open();
if (status < 0) {
return 1;
}

hid_t fapl_id = H5Pcreate(H5P_FILE_ACCESS);
H5Pset_fapl_mpio(fapl_id, MPI_COMM_WORLD, MPI_INFO_NULL);
hid_t file_id = H5Fopen(FILE, H5F_ACC_RDONLY, fapl_id);
if (file_id < 0) {
return 1;
}

int dset_data[5][5];
for (int i = 0; i < 5; i++) {
for (int j = 0; j < 5; j++) {
dset_data[i][j] = 0;
}
}

hsize_t coords[2] = {5, 10};
hid_t dataspace_id = H5Screate_simple(2, coords, NULL);
if (dataspace_id < 0) {
return 1;
}

hid_t dataset_id = H5Dopen2(file_id, "array_data", H5P_DEFAULT);
if (dataset_id < 0) {
return 1;
}

hsize_t count[2] = {5, 5};
hsize_t stride[2] = {1, 1};
hsize_t dataset_offset[2] = {0, 5 * mpi_rank};
hsize_t memory_offset[2] = {0, 0};
hsize_t block[2] = {1, 1};

status = H5Sselect_hyperslab(dataspace_id, H5S_SELECT_SET, dataset_offset, stride, count, block);
if (status < 0) {
return 1;
}
hid_t memory_dataspace_id = H5Screate_simple(2, count, NULL);
if (memory_dataspace_id < 0) {
return 1;
}
status = H5Sselect_hyperslab(memory_dataspace_id, H5S_SELECT_SET, memory_offset, stride, count, block);
if (status < 0) {
return 1;
}

hid_t dxpl_id = H5Pcreate(H5P_DATASET_XFER);
H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_INDEPENDENT);

status = H5Dread(dataset_id, H5T_NATIVE_INT, memory_dataspace_id, dataspace_id, dxpl_id, dset_data);
if (status < 0) {
return 1;
}

for (int i = 0; i < 5; i++) {
for (int j = 0; j < 5; j++) {
if (dset_data[i][j] != i * 10 + j + (5 * mpi_rank)) {
fprintf(stderr, "[%d][%d] %d != %d\n ", i, j, dset_data[i][j], i * 10 + j + (5 * mpi_rank));
return 1;
}
}
}

status = H5Dclose(dataset_id);
if (status < 0) {
return 1;
}
status = H5Pclose(dxpl_id);
if (status != 0) {
return status;
}
status = H5Sclose(memory_dataspace_id);
if (status < 0) {
return 1;
}
status = H5Sclose(dataspace_id);
if (status < 0) {
return 1;
}
status = H5Fclose(file_id);
if (file_id < 0) {
return 1;
}
if (status != 0) {
return status;
}
status = H5Pclose(fapl_id);
if (status != 0) {
return status;
}

H5close();
if (status < 0) {
return 1;
}
MPI_Finalize();

printf("[Rank: %d] HDF5 mpi_read_test finalized\n", mpi_rank);
return 0;
}
Loading