Skip to content

Commit

Permalink
DAOS-16845 test: add dynamic pool wait_for_aggregation
Browse files Browse the repository at this point in the history
Add TestPool.wait_for_aggregation to dynamically wait for pool
aggregation to complete.
Update tests to use the new function.

Test-tag: test_enospace_time_with_fg DfuseSpaceCheck
Skip-unit-tests: true
Skip-fault-injection-test: true

Signed-off-by: Dalton Bohning <[email protected]>
  • Loading branch information
daltonbohning committed Jan 8, 2025
1 parent 6f0ffad commit 6d8f4b9
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 27 deletions.
32 changes: 14 additions & 18 deletions src/tests/ftest/aggregation/dfuse_space_check.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""
(C) Copyright 2020-2024 Intel Corporation.
(C) Copyright 2020-2025 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP
SPDX-License-Identifier: BSD-2-Clause-Patent
"""

import os
import time

from dfuse_utils import get_dfuse, start_dfuse
from ior_test_base import IorTestBase
Expand All @@ -21,8 +21,8 @@ class DfuseSpaceCheck(IorTestBase):
def __init__(self, *args, **kwargs):
"""Initialize a DfuseSpaceCheck object."""
super().__init__(*args, **kwargs)
self.initial_space = None
self.block_size = None
self.__initial_space = None
self.__block_size = None

def get_nvme_free_space(self, display=True):
"""Display pool free space.
Expand Down Expand Up @@ -50,14 +50,10 @@ def wait_for_aggregation(self, retries=4, interval=60):
Default is 60.
"""
for _ in range(retries):
current_space = self.get_nvme_free_space()
if current_space == self.initial_space:
return
time.sleep(interval)

self.log.info("Free space when test terminated: %s", current_space)
self.fail("Aggregation did not complete within {} seconds".format(retries * interval))
if not self.pool.verify_space(
verify_free_nvme=lambda current: current == self.__initial_space,
retries=retries, interval=interval):
self.fail(f"Aggregation did not complete within {retries * interval} seconds")

def write_multiple_files(self, dfuse):
"""Write multiple files.
Expand All @@ -70,9 +66,9 @@ def write_multiple_files(self, dfuse):
"""
file_count = 0
while self.get_nvme_free_space(False) >= self.block_size:
while self.get_nvme_free_space(False) >= self.__block_size:
file_path = os.path.join(dfuse.mount_dir.value, "file{}.txt".format(file_count))
write_dd_cmd = "dd if=/dev/zero of={} bs={} count=1".format(file_path, self.block_size)
write_dd_cmd = f"dd if=/dev/zero of={file_path} bs={self.__block_size} count=1"
result = run_remote(
self.log, self.hostlist_clients, write_dd_cmd, verbose=False, timeout=300)
if not result.passed:
Expand Down Expand Up @@ -109,7 +105,7 @@ def test_dfusespacecheck(self):
:avocado: tags=DfuseSpaceCheck,test_dfusespacecheck
"""
# get test params for cont and pool count
self.block_size = self.params.get('block_size', '/run/dfusespacecheck/*')
self.__block_size = self.params.get('block_size', '/run/dfusespacecheck/*')

# Create a pool, container, and start dfuse
self.create_pool()
Expand All @@ -118,15 +114,15 @@ def test_dfusespacecheck(self):
start_dfuse(self, dfuse, self.pool, self.container)

# get nvme space before write
self.initial_space = self.get_nvme_free_space()
self.__initial_space = self.get_nvme_free_space()

# Create a file as large as we can
large_file = os.path.join(dfuse.mount_dir.value, 'largefile.txt')
if not run_remote(self.log, self.hostlist_clients, f'touch {large_file}').passed:
self.fail(f"Error creating {large_file}")
dd_count = (self.initial_space // self.block_size) + 1
dd_count = (self.__initial_space // self.__block_size) + 1
write_dd_cmd = "dd if=/dev/zero of={} bs={} count={}".format(
large_file, self.block_size, dd_count)
large_file, self.__block_size, dd_count)
run_remote(self.log, self.hostlist_clients, write_dd_cmd)

# Remove the file
Expand Down
30 changes: 22 additions & 8 deletions src/tests/ftest/nvme/enospace.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
'''
(C) Copyright 2020-2024 Intel Corporation.
(C) Copyright 2020-2025 Intel Corporation.

Check failure on line 2 in src/tests/ftest/nvme/enospace.py

View workflow job for this annotation

GitHub Actions / Copyright check

Copyright out of date
SPDX-License-Identifier: BSD-2-Clause-Patent
'''
Expand Down Expand Up @@ -464,22 +464,36 @@ def test_enospace_time_with_fg(self):
"""
self.log.info(self.pool.pool_percentage_used())

# Enabled TIme mode for Aggregation.
self.log_step("Enable pool aggregation")
self.pool.set_property("reclaim", "time")

self.log_step("Get initial pool free space")
initial_space = self.pool.get_pool_daos_space()
initial_free_scm = initial_space["s_free"][0]
initial_free_nvme = initial_space["s_free"][1]
self.log.info("initial_free_scm = %s", initial_free_scm)
self.log.info("initial_free_nvme = %s", initial_free_nvme)

# Repeat the test in loop.
for _loop in range(10):
self.log.info("-------enospc_time_fg Loop--------- %d", _loop)
self.log_step(f"Run IOR to fill the pool - enospace_time_with_fg loop {_loop}")
self.log.info(self.pool.pool_percentage_used())
# Run IOR to fill the pool.
log_file = f"-loop_{_loop}".join(os.path.splitext(self.client_log))
self.run_enospace_with_bg_job(log_file)
# Delete all the containers
self.log_step(f"Delete all containers - enospace_time_with_fg loop {_loop}")
self.delete_all_containers()
# Delete container will take some time to release the space
time.sleep(60)

# Run last IO
self.log_step(f"Wait for aggregation to complete - enospace_time_with_fg loop {_loop}")
agg_did_complete = self.pool.verify_space(
# verify_scm=lambda current: current <= initial_free_scm * 1.05,
# verify_nvme=lambda current: current <= initial_free_nvme * 1.05,
verify_free_scm=lambda current: current == initial_free_scm,
verify_free_nvme=lambda current: current == initial_free_nvme,
retries=8, interval=30)
if not agg_did_complete:
self.fail("Pool space not reclaimed after deleting all containers")

self.log_step("Run one more sanity IOR to fill 1%")
self.start_ior_load(storage='SCM', operation="Auto_Write", percent=1)

@skipForTicket("DAOS-8896")
Expand Down
38 changes: 37 additions & 1 deletion src/tests/ftest/util/test_utils_pool.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
(C) Copyright 2018-2024 Intel Corporation.
(C) Copyright 2018-2025 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP
SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -1475,6 +1476,41 @@ def wait_pool_dead_ranks(self, expected, interval=1, timeout=30):

self.log.info("Wait for dead ranks complete: dead ranks %s", expected)

def verify_space(self, verify_free_scm=None, verify_free_nvme=None, retries=4, interval=30):
"""Verify pool space with a time constraint.
Args:
verify_free_scm (callable, optional): function(current) to verify scm free space.
Defaults to None. Must supply at least one verify_* argument.
verify_free_nvme (callable, optional): function(current) to verify nvme free space.
Defaults to None. Must supply at least one verify_* argument.
retries (int, optional): number of times to retry. Default is 4.
interval (int, optional): seconds to wait before retrying. Default is 60.
Returns:
bool: whether space verification succeeded within the time limit
Raises:
ValueError: if no verify_* argument is given
"""
if verify_free_scm is None and verify_free_nvme is None:
raise ValueError("verify_free_scm or verify_free_nvme is required")
for retry in range(retries):
if retry > 0:
sleep(interval)
current_space = self.get_pool_daos_space()
current_free_scm = current_space["s_free"][0]
current_free_nvme = current_space["s_free"][1]
self.log.info("current_free_scm = %s", current_free_scm)
self.log.info("current_free_nvme = %s", current_free_nvme)
if verify_free_scm and not verify_free_scm(current_free_scm):
continue
if verify_free_nvme and not verify_free_nvme(current_free_nvme):
continue
return True # all succeeded
return False # out of retries

def verify_uuid_directory(self, host, scm_mount):
"""Check if pool folder exist on server.
Expand Down

0 comments on commit 6d8f4b9

Please sign in to comment.