From 6cb9368b81a1ef95638624791f57dd0383117651 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Tue, 3 Dec 2024 17:44:24 +0000 Subject: [PATCH] DAOS-16845 test: add dynamic pool wait_for_aggregation Add TestPool.wait_for_aggregation to dynamically wait for pool aggregation to complete. Update tests to use the new function. Test-tag: test_enospace_time_with_fg Skip-unit-tests: true Skip-fault-injection-test: true Required-githooks: true Signed-off-by: Dalton Bohning --- src/tests/ftest/nvme/enospace.py | 24 ++++++++++++----- src/tests/ftest/util/test_utils_pool.py | 34 +++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/src/tests/ftest/nvme/enospace.py b/src/tests/ftest/nvme/enospace.py index c7a996c110fd..9ffa2c77e696 100644 --- a/src/tests/ftest/nvme/enospace.py +++ b/src/tests/ftest/nvme/enospace.py @@ -464,22 +464,32 @@ def test_enospace_time_with_fg(self): """ self.log.info(self.pool.pool_percentage_used()) - # Enabled TIme mode for Aggregation. + self.log_step("Enable pool aggregation") self.pool.set_property("reclaim", "time") + self.log_step("Get initial pool free space") + initial_space = self.pool.get_pool_daos_space() + initial_free_scm = initial_space["s_free"][0] + initial_free_nvme = initial_space["s_free"][1] + # Repeat the test in loop. for _loop in range(10): - self.log.info("-------enospc_time_fg Loop--------- %d", _loop) + self.log_step(f"Run IOR to fill the pool - enospace_time_with_fg loop {_loop}") self.log.info(self.pool.pool_percentage_used()) # Run IOR to fill the pool. log_file = f"-loop_{_loop}".join(os.path.splitext(self.client_log)) self.run_enospace_with_bg_job(log_file) - # Delete all the containers + self.log_step(f"Delete all containers - enospace_time_with_fg loop {_loop}") self.delete_all_containers() - # Delete container will take some time to release the space - time.sleep(60) - - # Run last IO + self.log_step(f"Wait for aggregation to complete - enospace_time_with_fg loop {_loop}") + agg_did_complete = self.pool.wait_for_aggregation( + verify_scm=lambda current: current <= initial_free_scm * 1.05, + verify_nvme=lambda current: current <= initial_free_nvme * 1.05, + retries=4, interval=30) + if not agg_did_complete: + self.fail("Pool space not reclaimed after deleting all containers") + + self.log_step("Run one more sanity IOR to fill 1%") self.start_ior_load(storage='SCM', operation="Auto_Write", percent=1) @skipForTicket("DAOS-8896") diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index f5e88d2c26cc..ee010993376e 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -1475,6 +1475,40 @@ def wait_pool_suspect_ranks(self, expected, interval=1, timeout=30): self.log.info("Wait for suspect ranks complete: suspect ranks %s", expected) + def wait_for_aggregation(self, verify_scm=None, verify_nvme=None, + retries=4, interval=30): + """Wait for aggregation to finish. + + Args: + verify_scm (callable, optional): function(current_scm) to verify scm free space. + Defaults to None. Must supply at least one of verify_scm or verify_nvme. + verify_nvme (callable, optional): function(current_nvme) to verify nvme free space. + Defaults to None. Must supply at least one of verify_scm or verify_nvme. + retries (int, optional): number of times to retry. Default is 4. + interval (int, optional): seconds to wait before retrying. Default is 60. + + Returns: + bool: whether aggregation completed within the time limit + + Raises: + ValueError: if neither verify_scm nor verify_nvme are given + + """ + if verify_scm is None and verify_nvme is None: + raise ValueError("verify_scm or verify_nvme is required") + for _ in range(retries): + current_space = self.get_pool_daos_space() + current_free_scm = current_space["s_free"][0] + current_free_nvme = current_space["s_free"][1] + self.log.info("current_free_scm = %s", current_free_scm) + self.log.info("current_free_nvme = %s", current_free_nvme) + if (verify_scm is None or verify_scm(current_free_scm)) and \ + (verify_nvme is None or verify_nvme(current_free_scm)): + return True + sleep(interval) + + return False + def verify_uuid_directory(self, host, scm_mount): """Check if pool folder exist on server.