From 50987573ac6637dd337033d70b71b89f01eacc78 Mon Sep 17 00:00:00 2001 From: Scot Breitenfeld Date: Wed, 20 Nov 2024 23:09:38 -0600 Subject: [PATCH 1/2] fixed HDF5_VOL_CONNECTOR settings for the tests --- benchmarks/CMakeLists.txt | 22 +++++++++++++++++----- benchmarks/config_1.cfg | 5 +++++ tests/CMakeLists.txt | 22 +++++++++++++++++----- tests/config_1.cfg | 5 +++++ 4 files changed, 44 insertions(+), 10 deletions(-) create mode 100644 benchmarks/config_1.cfg create mode 100644 tests/config_1.cfg diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 91c1477..e1d2c29 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -2,10 +2,6 @@ if(DEFINED ENV{HDF5_PLUGIN_PATH}) message("HDF5_PLUGIN_PATH is " $ENV{HDF5_PLUGIN_PATH}) endif() -if(DEFINED ENV{HDF5_VOL_CONNECTOR}) - message("HDF5_VOL_CONNECTOR is " $ENV{HDF5_VOL_CONNECTOR}) -endif() - find_package(MPI) include_directories(SYSTEM ${MPI_INCLUDE_PATH}) include_directories(${HDF5_INCLUDE_DIRS}) @@ -13,6 +9,22 @@ include_directories(${ASYNC_INCLUDE_DIRS}) set(benchmarks write_cache write prepare_dataset read_cache) +file(COPY config_1.cfg DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) + +# Set up the environment for the test run. +list( + APPEND + TEST_ENV + "HDF5_VOL_CONNECTOR=cache_ext config=config_1.cfg\\;under_vol=0\\;under_info={}" +) +message("HDF5_VOL_CONNECTOR is " ${TEST_ENV}) + +list( + APPEND + TEST_ENV + "HDF5_PLUGIN_PATH=$ENV{HDF5_PLUGIN_PATH}" +) + foreach(test ${benchmarks}) add_executable(${test}.exe ${CMAKE_CURRENT_SOURCE_DIR}/${test}.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../utils/debug.c ${CMAKE_CURRENT_SOURCE_DIR}/profiling.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../src/cache_utils.c) target_link_libraries(${test}.exe PRIVATE ${MPI_C_LIBRARIES} ${HDF5_LIBRARIES} cache_new_h5api) @@ -20,7 +32,7 @@ foreach(test ${benchmarks}) set_tests_properties( ${test} PROPERTIES - ENVIRONMENT "HDF5_VOL_CONNECTOR=\"cache_ext config=config_1.cfg;under_vol=0;under_info={};\";HDF5_PLUGIN_PATH=$ENV{HDF5_PLUGIN_PATH}") + ENVIRONMENT "${TEST_ENV}") endforeach () install( diff --git a/benchmarks/config_1.cfg b/benchmarks/config_1.cfg new file mode 100644 index 0000000..47e936b --- /dev/null +++ b/benchmarks/config_1.cfg @@ -0,0 +1,5 @@ +HDF5_CACHE_STORAGE_SCOPE: LOCAL # the scope of the storage [LOCAL|GLOBAL] +HDF5_CACHE_STORAGE_PATH: /tmp # path of local storage +HDF5_CACHE_STORAGE_SIZE: 21474836480 # size of the storage space in bytes +HDF5_CACHE_STORAGE_TYPE: SSD # local storage type [SSD|BURST_BUFFER|MEMORY|GPU], default SSD +HDF5_CACHE_REPLACEMENT_POLICY: LRU # [LRU|LFU|FIFO|LIFO] diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3d46577..9eb08a6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -2,10 +2,6 @@ if(DEFINED ENV{HDF5_PLUGIN_PATH}) message("HDF5_PLUGIN_PATH is " $ENV{HDF5_PLUGIN_PATH}) endif() -if(DEFINED ENV{HDF5_VOL_CONNECTOR}) - message("HDF5_VOL_CONNECTOR is " $ENV{HDF5_VOL_CONNECTOR}) -endif() - find_package(MPI) include_directories(SYSTEM ${MPI_INCLUDE_PATH}) include_directories(${HDF5_INCLUDE_DIRS}) @@ -13,6 +9,22 @@ include_directories(${ASYNC_INCLUDE_DIRS}) set(tests test_file test_group test_dataset test_dataset_async_api test_write_multi test_multdset) +file(COPY config_1.cfg DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) + +# Set up the environment for the test run. +list( + APPEND + TEST_ENV + "HDF5_VOL_CONNECTOR=cache_ext config=config_1.cfg\\;under_vol=0\\;under_info={}" +) +message("HDF5_VOL_CONNECTOR is " ${TEST_ENV}) + +list( + APPEND + TEST_ENV + "HDF5_PLUGIN_PATH=$ENV{HDF5_PLUGIN_PATH}" +) + foreach(test ${tests}) add_executable(${test}.exe ${CMAKE_CURRENT_SOURCE_DIR}/${test}.cpp) target_link_libraries(${test}.exe PRIVATE ${MPI_C_LIBRARIES} ${HDF5_LIBRARIES} cache_new_h5api) @@ -20,7 +32,7 @@ foreach(test ${tests}) set_tests_properties( ${test} PROPERTIES - ENVIRONMENT "HDF5_VOL_CONNECTOR=\"cache_ext config=config_1.cfg;under_vol=0;under_info={};\";HDF5_PLUGIN_PATH=$ENV{HDF5_PLUGIN_PATH}") + ENVIRONMENT "${TEST_ENV}") endforeach () install( diff --git a/tests/config_1.cfg b/tests/config_1.cfg new file mode 100644 index 0000000..47e936b --- /dev/null +++ b/tests/config_1.cfg @@ -0,0 +1,5 @@ +HDF5_CACHE_STORAGE_SCOPE: LOCAL # the scope of the storage [LOCAL|GLOBAL] +HDF5_CACHE_STORAGE_PATH: /tmp # path of local storage +HDF5_CACHE_STORAGE_SIZE: 21474836480 # size of the storage space in bytes +HDF5_CACHE_STORAGE_TYPE: SSD # local storage type [SSD|BURST_BUFFER|MEMORY|GPU], default SSD +HDF5_CACHE_REPLACEMENT_POLICY: LRU # [LRU|LFU|FIFO|LIFO] From f08cd7a04c2bb0eb7edd88ca2047a2b197eb3815 Mon Sep 17 00:00:00 2001 From: Scot Breitenfeld Date: Wed, 20 Nov 2024 23:36:09 -0600 Subject: [PATCH 2/2] update the README --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a37bfc1..708a5a3 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Documentation: -This is the public repo for Cache VOL, a software package developed in the ```ExaIO``` Exascale Computing Project. The main objective of Cache VOL is to incorporate fast storage layers (e.g, burst buffer, node-local storage) into parallel I/O workflow for caching and staging data to improve the I/O efficiency. +This is the public repo for Cache VOL, a software package developed in the ```ExaIO``` Exascale Computing Project. Cache VOL's main objective is to incorporate fast storage layers (e.g., burst buffer, node-local storage) into parallel I/O workflows for caching and staging data to improve I/O efficiency. The design, implementation, and performance evaluation of Cache VOL is presented in our CCGrid'2022 paper: Huihuo Zheng, Venkatram Vishwanath, Quincey Koziol, Houjun Tang, John Ravi, John Mainzer, Suren Byna, "HDF5 Cache VOL: Efficient and Scalable Parallel @@ -40,7 +40,7 @@ HDF5_VOL_DIR # prefix for install the VOL connectors ### Building HDF5 shared library -Currently, the cache VOL depends on the develop branch of HDF5, +Currently, the cache VOL depends on the versions equal to or greater than 1.14 or the *develop* branch of HDF5, ```bash git clone -b develop https://github.com/HDFGroup/hdf5.git @@ -144,9 +144,9 @@ This will generate a hdf5 file, images.h5, which contains 8192 samples. Each 224 * --shuffle: Whether to shuffle the samples at the beginning of each epoch. * --local_storage [Default: ./]: The path of the local storage. -For the read benchmark, it is important to isolate the DRAM caching effect. By default, during the first iteration, the system will cache all the data on the memory (RSS), unless the memory capacity is not big enough to cache all the data. This ends up with a very high bandwidth at second iteration, and it is independent of where the node-local storage are. +To accurately assess the read benchmark, isolating the effects of DRAM caching is crucial. By default, during the first iteration, the system caches all data in memory (RSS) unless the memory capacity is insufficient to store all the data. As a result, the second iteration achieves a very high bandwidth, regardless of the location of the node-local storage. -To remove the cache / buffering effect for read benchmarks, one can allocate a big array that is close to the size of the RAM, so that it does not have any extra space to cache the input HDF5 file. This can be achieve by setting ```MEMORY_PER_PROC``` (memory per process in Giga Byte). **However, this might cause the compute node to crash.** The other way is to read dummpy files by setting ```CACHE_NUM_FILES``` (number of dummpy files to read per process). +To remove the cache / buffering effect for read benchmarks, one can allocate a big array close to the RAM size so that it does not have any extra space to cache the input HDF5 file. This can be achieved by setting ```MEMORY_PER_PROC``` (memory per process in Giga Byte). **However, this might cause the compute node to crash.** The other way is to read dummy files by setting ```CACHE_NUM_FILES``` (number of dummy files to read per process). ## Citation If you use Cache VOL, please cite the following paper