Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-16845 test: add dynamic pool wait_for_aggregation #15553

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 13 additions & 17 deletions src/tests/ftest/aggregation/dfuse_space_check.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""
(C) Copyright 2020-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""

import os
import time

from dfuse_utils import get_dfuse, start_dfuse
from ior_test_base import IorTestBase
Expand All @@ -21,8 +21,8 @@ class DfuseSpaceCheck(IorTestBase):
def __init__(self, *args, **kwargs):
"""Initialize a DfuseSpaceCheck object."""
super().__init__(*args, **kwargs)
self.initial_space = None
self.block_size = None
self.__initial_space = None
self.__block_size = None

def get_nvme_free_space(self, display=True):
"""Display pool free space.
Expand Down Expand Up @@ -50,14 +50,10 @@ def wait_for_aggregation(self, retries=4, interval=60):
Default is 60.

"""
for _ in range(retries):
current_space = self.get_nvme_free_space()
if current_space == self.initial_space:
return
time.sleep(interval)

self.log.info("Free space when test terminated: %s", current_space)
self.fail("Aggregation did not complete within {} seconds".format(retries * interval))
if not self.pool.verify_space(
verify_free_nvme=lambda current: current == self.__initial_space,
retries=retries, interval=interval):
self.fail(f"Aggregation did not complete within {retries * interval} seconds")

def write_multiple_files(self, dfuse):
"""Write multiple files.
Expand All @@ -70,9 +66,9 @@ def write_multiple_files(self, dfuse):

"""
file_count = 0
while self.get_nvme_free_space(False) >= self.block_size:
while self.get_nvme_free_space(False) >= self.__block_size:
file_path = os.path.join(dfuse.mount_dir.value, "file{}.txt".format(file_count))
write_dd_cmd = "dd if=/dev/zero of={} bs={} count=1".format(file_path, self.block_size)
write_dd_cmd = f"dd if=/dev/zero of={file_path} bs={self.__block_size} count=1"
result = run_remote(
self.log, self.hostlist_clients, write_dd_cmd, verbose=False, timeout=300)
if not result.passed:
Expand Down Expand Up @@ -109,7 +105,7 @@ def test_dfusespacecheck(self):
:avocado: tags=DfuseSpaceCheck,test_dfusespacecheck
"""
# get test params for cont and pool count
self.block_size = self.params.get('block_size', '/run/dfusespacecheck/*')
self.__block_size = self.params.get('block_size', '/run/dfusespacecheck/*')

# Create a pool, container, and start dfuse
self.create_pool()
Expand All @@ -118,15 +114,15 @@ def test_dfusespacecheck(self):
start_dfuse(self, dfuse, self.pool, self.container)

# get nvme space before write
self.initial_space = self.get_nvme_free_space()
self.__initial_space = self.get_nvme_free_space()

# Create a file as large as we can
large_file = os.path.join(dfuse.mount_dir.value, 'largefile.txt')
if not run_remote(self.log, self.hostlist_clients, f'touch {large_file}').passed:
self.fail(f"Error creating {large_file}")
dd_count = (self.initial_space // self.block_size) + 1
dd_count = (self.__initial_space // self.__block_size) + 1
write_dd_cmd = "dd if=/dev/zero of={} bs={} count={}".format(
large_file, self.block_size, dd_count)
large_file, self.__block_size, dd_count)
run_remote(self.log, self.hostlist_clients, write_dd_cmd)

# Remove the file
Expand Down
29 changes: 22 additions & 7 deletions src/tests/ftest/nvme/enospace.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
'''
(C) Copyright 2020-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
'''
Expand Down Expand Up @@ -464,22 +465,36 @@ def test_enospace_time_with_fg(self):
"""
self.log.info(self.pool.pool_percentage_used())

# Enabled TIme mode for Aggregation.
self.log_step("Enable pool aggregation")
self.pool.set_property("reclaim", "time")

self.log_step("Get initial pool free space")
initial_space = self.pool.get_pool_daos_space()
initial_free_scm = initial_space["s_free"][0]
initial_free_nvme = initial_space["s_free"][1]
self.log.info("initial_free_scm = %s", initial_free_scm)
self.log.info("initial_free_nvme = %s", initial_free_nvme)

# Repeat the test in loop.
for _loop in range(10):
self.log.info("-------enospc_time_fg Loop--------- %d", _loop)
self.log_step(f"Run IOR to fill the pool - enospace_time_with_fg loop {_loop}")
self.log.info(self.pool.pool_percentage_used())
# Run IOR to fill the pool.
log_file = f"-loop_{_loop}".join(os.path.splitext(self.client_log))
self.run_enospace_with_bg_job(log_file)
# Delete all the containers
self.log_step(f"Delete all containers - enospace_time_with_fg loop {_loop}")
self.delete_all_containers()
# Delete container will take some time to release the space
time.sleep(60)

# Run last IO
self.log_step(f"Wait for aggregation to complete - enospace_time_with_fg loop {_loop}")
agg_did_complete = self.pool.verify_space(
# verify_scm=lambda current: current <= initial_free_scm * 1.05,
# verify_nvme=lambda current: current <= initial_free_nvme * 1.05,
verify_free_scm=lambda current: current == initial_free_scm,
verify_free_nvme=lambda current: current == initial_free_nvme,
retries=8, interval=30)
if not agg_did_complete:
self.fail("Pool space not reclaimed after deleting all containers")

self.log_step("Run one more sanity IOR to fill 1%")
self.start_ior_load(storage='SCM', operation="Auto_Write", percent=1)

@skipForTicket("DAOS-8896")
Expand Down
36 changes: 36 additions & 0 deletions src/tests/ftest/util/test_utils_pool.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
(C) Copyright 2018-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -1475,6 +1476,41 @@ def wait_pool_dead_ranks(self, expected, interval=1, timeout=30):

self.log.info("Wait for dead ranks complete: dead ranks %s", expected)

def verify_space(self, verify_free_scm=None, verify_free_nvme=None, retries=4, interval=30):
"""Verify pool space with a time constraint.

Args:
verify_free_scm (callable, optional): function(current) to verify scm free space.
Defaults to None. Must supply at least one verify_* argument.
verify_free_nvme (callable, optional): function(current) to verify nvme free space.
Defaults to None. Must supply at least one verify_* argument.
retries (int, optional): number of times to retry. Default is 4.
interval (int, optional): seconds to wait before retrying. Default is 60.

Returns:
bool: whether space verification succeeded within the time limit

Raises:
ValueError: if no verify_* argument is given

"""
if verify_free_scm is None and verify_free_nvme is None:
raise ValueError("verify_free_scm or verify_free_nvme is required")
for retry in range(retries):
if retry > 0:
sleep(interval)
current_space = self.get_pool_daos_space()
current_free_scm = current_space["s_free"][0]
current_free_nvme = current_space["s_free"][1]
self.log.info("current_free_scm = %s", current_free_scm)
self.log.info("current_free_nvme = %s", current_free_nvme)
if verify_free_scm and not verify_free_scm(current_free_scm):
continue
if verify_free_nvme and not verify_free_nvme(current_free_nvme):
continue
return True # all succeeded
return False # out of retries

def verify_uuid_directory(self, host, scm_mount):
"""Check if pool folder exist on server.

Expand Down
Loading