Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-15002 test: use default pool svc for rebuild tests #13648

Merged
merged 1 commit into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions src/tests/ftest/container/rf_enforcement.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@ def test_container_redundancy_factor_oclass_enforcement(self):

:avocado: tags=all,full_regression
:avocado: tags=vm
:avocado: tags=container
:avocado: tags=container_rf,cont_rf_oclass_enforcement
:avocado: tags=test_container_redundancy_factor_oclass_enforcement
:avocado: tags=container,container_rf,cont_rf_oclass_enforcement
:avocado: tags=ContRfEnforce,test_container_redundancy_factor_oclass_enforcement
"""
self.mode = "cont_rf_enforcement"
self.execute_cont_rf_test()
self.execute_cont_rf_test(mode="cont_rf_enforcement")
3 changes: 0 additions & 3 deletions src/tests/ftest/deployment/server_rank_failure.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,9 @@ server_config:

pool_size_ratio_80:
size: 80%
control_method: dmg
rebuild_timeout: 960
svcn: 5
pool_size_value:
size: 500G
control_method: dmg
rebuild_timeout: 240

container:
Expand Down
2 changes: 0 additions & 2 deletions src/tests/ftest/erasurecode/multiple_failure.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ server_config:
storage: auto
pool:
size: 93%
svcn: 5
control_method: dmg
container:
type: POSIX
control_method: daos
Expand Down
2 changes: 0 additions & 2 deletions src/tests/ftest/erasurecode/offline_rebuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ server_config:
storage: auto
pool:
size: 93%
svcn: 1
control_method: dmg
container:
type: POSIX
control_method: daos
Expand Down
2 changes: 0 additions & 2 deletions src/tests/ftest/erasurecode/offline_rebuild_single.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ server_config:
storage: auto
pool:
size: 93%
svcn: 1
control_method: dmg
pool_query_timeout: 30
container:
type: POSIX
Expand Down
2 changes: 0 additions & 2 deletions src/tests/ftest/erasurecode/online_rebuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ server_config:
storage: auto
pool:
size: 93%
svcn: 1
control_method: dmg
container:
type: POSIX
control_method: daos
Expand Down
2 changes: 0 additions & 2 deletions src/tests/ftest/erasurecode/online_rebuild_single.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ server_config:
storage: auto
pool:
size: 93%
svcn: 5
control_method: dmg
pool_query_timeout: 30
properties: rd_fac:2
container:
Expand Down
2 changes: 0 additions & 2 deletions src/tests/ftest/erasurecode/rebuild_disabled.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ server_config:
storage: auto
pool:
size: 93%
svcn: 1
control_method: dmg
pool_query_timeout: 30
container:
type: POSIX
Expand Down
2 changes: 0 additions & 2 deletions src/tests/ftest/erasurecode/rebuild_disabled_single.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ server_config:
storage: auto
pool:
size: 93%
svcn: 1
control_method: dmg
pool_query_timeout: 30
container:
type: POSIX
Expand Down
4 changes: 1 addition & 3 deletions src/tests/ftest/rebuild/cascading_failures.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@ server_config:
scm_mount: /mnt/daos
system_ram_reserved: 1
pool:
scm_size: 1073741824
svcn: 2
control_method: dmg
size: 1G
pool_query_timeout: 30
properties: rd_fac:2
container:
Expand Down
3 changes: 1 addition & 2 deletions src/tests/ftest/rebuild/container_rf.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,4 @@ def test_rebuild_with_container_rf(self):
:avocado: tags=container,rebuild,container_rf
:avocado: tags=RbldContRfTest,test_rebuild_with_container_rf
"""
self.mode = "cont_rf_with_rebuild"
self.execute_cont_rf_test()
self.execute_cont_rf_test(mode="cont_rf_with_rebuild")
7 changes: 3 additions & 4 deletions src/tests/ftest/rebuild/container_rf.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
hosts:
test_servers: 6
test_servers: 7
test_clients: 1
timeout: 480
server_config:
Expand All @@ -15,9 +15,8 @@ server_config:
scm_mount: /mnt/daos
system_ram_reserved: 1
pool:
scm_size: 1073741824
svcn: 3
control_method: dmg
size: 1G
svcn: 7 # To match number of servers
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic should be: "This test kills 3 engines." => "It needs at least 3 * 2 + 1 = 7 PS replicas to avoid losing the PS." => "It needs at least 7 engines for 7 PS replicas."

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right. This test has some other issues I need to fix as part of https://daosio.atlassian.net/browse/DAOS-15074.
I want to make this more flexible then

Copy link
Contributor

@liw liw Jan 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@daltonbohning, sorry for the ambiguity. I mean the comment "to match number of servers" is not accurate; it should be something along the line of "to allow killing 3 engines". That is, the number of PS replicas depends on the number of engines the test kills, and the number of engines depends on the number of PS replicas; not the other way around.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No worries, I understand! I also set the svcn to exactly the number of servers so all ranks will be svc ranks, making this test a little more deterministic until I fix it properly. E.g. there are some cases where I think the test needs to look at which ranks are svc ranks before killing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good! I think "7 engines and 7 PS replicas" is good enough, for there's no external interface for knowing the current, exact set of PS replicas.

pool_query_timeout: 30
container:
control_method: daos
Expand Down
2 changes: 0 additions & 2 deletions src/tests/ftest/rebuild/delete_objects.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ server_config:
system_ram_reserved: 1
pool:
scm_size: 1073741824
svcn: 2
debug: true
control_method: dmg
pool_query_timeout: 30
properties: rd_fac:2
container:
Expand Down
4 changes: 1 addition & 3 deletions src/tests/ftest/rebuild/read_array.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ server_config:
scm_mount: /mnt/daos
system_ram_reserved: 1
pool:
scm_size: 1073741824
svcn: 2
control_method: dmg
size: 1G
pool_query_timeout: 30
properties: rd_fac:2
container:
Expand Down
5 changes: 2 additions & 3 deletions src/tests/ftest/rebuild/widely_striped.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,14 @@ testparams:
pool:
scm_size: 10G
nvme_size: 60G
svcn: 5
control_method: dmg
rebuild_timeout: 240
pool_query_timeout: 60
properties: rd_fac:2
container:
type: POSIX
control_method: daos
oclass: RP_3G1
properties: "rd_fac:2"
properties: rd_fac:2
mdtest:
api: DFS
client_processes:
Expand Down
3 changes: 1 addition & 2 deletions src/tests/ftest/rebuild/with_ior.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ server_config:

pool:
scm_size: 6G
svcn: 3
control_method: dmg
properties: rd_fac:1
pool_query_timeout: 30

container:
Expand Down
33 changes: 9 additions & 24 deletions src/tests/ftest/util/container_rf_test_base.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
"""
(C) Copyright 2019-2023 Intel Corporation.
(C) Copyright 2019-2024 Intel Corporation.

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
import re

from daos_utils import DaosCommand
from general_utils import DaosTestError
from rebuild_test_base import RebuildTestBase

Expand All @@ -15,21 +14,6 @@ class ContRedundancyFactor(RebuildTestBase):

:avocado: recursive
"""

def __init__(self, *args, **kwargs):
"""Initialize a CascadingFailures object."""
super().__init__(*args, **kwargs)
self.mode = None
self.daos_cmd = None

def create_test_container(self):
"""Create a container and write objects."""
self.log.info(
"==>(1)Create pool and container with redundant factor,"
" start background IO object write")
self.container.create()
self.container.write_objects(self.inputs.rank.value[0], self.inputs.object_class.value)

def verify_rank_has_objects(self):
"""Verify the first rank to be excluded has at least one object."""
rank_list = self.container.get_target_rank_lists(" before rebuild")
Expand Down Expand Up @@ -61,8 +45,7 @@ def verify_cont_rf_healthstatus(self, expected_rf, expected_health):
actual_rf = None
actual_health = None

cont_props = self.daos_cmd.container_get_prop(
pool=self.pool.uuid, cont=self.container.uuid, properties=["rd_fac", "status"])
cont_props = self.container.get_prop(properties=["rd_fac", "status"])
for cont_prop in cont_props["response"]:
if cont_prop["name"] == "rd_fac":
actual_rf = cont_prop["value"]
Expand Down Expand Up @@ -144,19 +127,20 @@ def create_test_container_and_write_obj(self, negative_test=False):
self.fail("#Negative test, container redundancy factor "
"test failed, return error RC: -1003 not found")

def execute_cont_rf_test(self, create_container=True):
def execute_cont_rf_test(self, create_container=True, mode=None):
"""Execute the rebuild test steps for container rd_fac test.

Args:
create_container (bool, optional): should the test create a
container. Defaults to True.
mode (str): either "cont_rf_with_rebuild" or "cont_rf_enforcement"
"""
# Get the test params and var
self.setup_test_pool()
self.daos_cmd = DaosCommand(self.bin)
if create_container:
self.setup_test_container()
oclass = self.inputs.object_class.value
# Negative testing pertains to RF enforcement when creating objects - not rebuild
negative_test = True
rd_fac = ''.join(self.container.properties.value.split(":"))
rf_match = re.search(r"rd_fac([0-9]+)", rd_fac)
Expand All @@ -171,7 +155,8 @@ def execute_cont_rf_test(self, create_container=True):
self.create_test_pool()
# Create a container and write objects
self.create_test_container_and_write_obj(negative_test)
if self.mode == "cont_rf_with_rebuild":

if mode == "cont_rf_with_rebuild":
num_of_ranks = len(self.inputs.rank.value)
if num_of_ranks > rf_num:
expect_cont_status = "UNCLEAN"
Expand All @@ -196,7 +181,7 @@ def execute_cont_rf_test(self, create_container=True):
self.log.info("==>(7)Check for container data if the container is healthy.")
self.verify_container_data()
self.log.info("Test passed")
elif self.mode == "cont_rf_enforcement":
elif mode == "cont_rf_enforcement":
self.log.info("Container rd_fac test passed")
else:
self.fail("#Unsupported container_rf test mode")
self.fail(f"Unsupported container_rf test mode: {mode}")
2 changes: 1 addition & 1 deletion src/tests/ftest/util/test_utils_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def create(self):
self.pool = TestPool(self.context, DmgCommand(self.bin))

If it wants to use --nsvc option, it needs to set the value to
svcn.value. Otherwise, 1 is used. If it wants to use --group, it needs
svcn.value. If it wants to use --group, it needs
to set groupname.value. If it wants to use --user, it needs to set
username.value. If it wants to add other options, directly set it
to self.dmg.action_command. Refer dmg_utils.py pool_create method for
Expand Down
Loading