diff --git a/src/tests/ftest/osa/offline_drain.py b/src/tests/ftest/osa/offline_drain.py index f24ea91016a..9a5cde4bfec 100644 --- a/src/tests/ftest/osa/offline_drain.py +++ b/src/tests/ftest/osa/offline_drain.py @@ -1,260 +1,282 @@ -""" - (C) Copyright 2020-2023 Intel Corporation. - - SPDX-License-Identifier: BSD-2-Clause-Patent -""" -import random - -from nvme_utils import ServerFillUp -from osa_utils import OSAUtils -from test_utils_pool import add_pool -from write_host_file import write_host_file - - -class OSAOfflineDrain(OSAUtils, ServerFillUp): - # pylint: disable=too-many-ancestors - """ - Test Class Description: This test runs - daos_server offline drain test cases. - - :avocado: recursive - """ - - def setUp(self): - """Set up for test case.""" - super().setUp() - self.dmg_command = self.get_dmg_command() - self.ranks = self.params.get("rank_list", '/run/test_ranks/*') - self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*') - self.ior_test_sequence = self.params.get( - "ior_test_sequence", '/run/ior/iorflags/*') - # Recreate the client hostfile without slots defined - self.hostfile_clients = write_host_file(self.hostlist_clients, self.workdir) - - def run_offline_drain_test(self, num_pool, data=False, oclass=None, pool_fillup=0): - """Run the offline drain without data. - - Args: - num_pool (int) : total pools to create for testing purposes. - data (bool) : whether pool has no data or to create some data in pool. - Defaults to False. - oclass (str): DAOS object class (eg: RP_2G1,etc) - """ - # Create a pool - pool = {} - target_list = [] - - if oclass is None: - oclass = self.ior_cmd.dfs_oclass.value - - # Exclude target : random two targets (target idx : 0-7) - exc = random.randint(0, 6) # nosec - target_list.append(exc) - target_list.append(exc + 1) - t_string = "{},{}".format(target_list[0], target_list[1]) - - for val in range(0, num_pool): - pool[val] = add_pool(self, connect=False) - self.pool = pool[val] - self.pool.set_property("reclaim", "disabled") - test_seq = self.ior_test_sequence[0] - - if data: - # if pool_fillup is greater than 0, then - # use start_ior_load method from nvme_utils.py. - # Otherwise, use the osa_utils.py run_ior_thread - # method. - if pool_fillup > 0: - self.ior_cmd.dfs_oclass.update(oclass) - self.ior_cmd.dfs_dir_oclass.update(oclass) - self.ior_default_flags = self.ior_w_flags - self.log.info(self.pool.pool_percentage_used()) - self.start_ior_load(storage='NVMe', operation="Auto_Write", percent=pool_fillup) - self.log.info(self.pool.pool_percentage_used()) - else: - self.run_ior_thread("Write", oclass, test_seq) - self.run_mdtest_thread(oclass) - if self.test_with_snapshot is True: - # Create a snapshot of the container - # after IOR job completes. - self.container.create_snap() - self.log.info("Created container snapshot: %s", self.container.epoch) - if self.test_during_aggregation is True: - self.run_ior_thread("Write", oclass, test_seq) - - # Drain ranks and targets - for val in range(0, num_pool): - # Drain ranks provided in YAML file - for index, rank in enumerate(self.ranks): - self.pool = pool[val] - # If we are testing using multiple pools, reintegrate - # the rank back and then drain. - self.pool.display_pool_daos_space("Pool space: Beginning") - # Get initial total free space (scm+nvme) - initial_total_space = self.pool.get_total_space(refresh=True) - pver_begin = self.pool.get_version(True) - self.log.info("Pool Version at the beginning %s", pver_begin) - if self.test_during_aggregation is True and index == 0: - self.pool.set_property("reclaim", "time") - self.delete_extra_container(self.pool) - self.simple_osa_reintegrate_loop(rank=rank, action="drain") - if (self.test_during_rebuild is True and val == 0): - # Exclude rank 3 - self.pool.exclude([3]) - self.pool.wait_for_rebuild_to_start() - # If the pool is filled up just drain only a single rank. - if pool_fillup > 0 and index > 0: - continue - output = self.pool.drain(rank, t_string) - self.print_and_assert_on_rebuild_failure(output) - total_space_after_drain = self.pool.get_total_space(refresh=True) - - pver_drain = self.pool.get_version(True) - self.log.info("Pool Version after drain %d", pver_drain) - # Check pool version incremented after pool drain - self.assertGreater(pver_drain, (pver_begin + 1), - "Pool Version Error: After drain") - if self.test_during_aggregation is False: - self.assertGreater(initial_total_space, total_space_after_drain, - "Expected total space after drain is more than initial") - if num_pool > 1: - output = self.pool.reintegrate(rank, t_string) - self.print_and_assert_on_rebuild_failure(output) - total_space_after_reintegration = self.pool.get_total_space(refresh=True) - self.assertGreater( - total_space_after_reintegration, total_space_after_drain, - "Expected total space after reintegration is less than drain") - if (self.test_during_rebuild is True and val == 0): - # Reintegrate rank 3 - output = self.pool.reintegrate("3") - self.print_and_assert_on_rebuild_failure(output) - total_space_after_reintegration = self.pool.get_total_space(refresh=True) - self.assertGreater( - total_space_after_reintegration, total_space_after_drain, - "Expected total space after reintegration is less than drain") - - for val in range(0, num_pool): - display_string = "Pool{} space at the End".format(val) - pool[val].display_pool_daos_space(display_string) - if data: - if pool_fillup > 0: - self.start_ior_load(storage='NVMe', operation='Auto_Read', percent=pool_fillup) - else: - self.run_ior_thread("Read", oclass, test_seq) - self.run_mdtest_thread(oclass) - self.container = self.pool_cont_dict[self.pool][0] - self.container.daos.env['UCX_LOG_LEVEL'] = 'error' - self.container.check() - - def test_osa_offline_drain(self): - """JIRA ID: DAOS-4750. - - Test Description: Validate Offline Drain - - :avocado: tags=all,pr,daily_regression - :avocado: tags=hw,medium - :avocado: tags=osa,osa_drain,offline_drain,checksum,ior - :avocado: tags=OSAOfflineDrain,test_osa_offline_drain - """ - self.log.info("Offline Drain : Basic Drain") - self.run_offline_drain_test(1, True) - - def test_osa_offline_drain_without_checksum(self): - """Test ID: DAOS-7159. - - Test Description: Validate Offline Drain without enabling checksum in container properties. - - :avocado: tags=all,full_regression - :avocado: tags=hw,medium - :avocado: tags=osa,osa_drain,offline_drain - :avocado: tags=OSAOfflineDrain,test_osa_offline_drain_without_checksum - """ - self.test_with_checksum = self.params.get("test_with_checksum", "/run/checksum/*") - self.log.info("Offline Drain : Without Checksum") - self.run_offline_drain_test(1, data=True) - - def test_osa_offline_drain_during_aggregation(self): - """Test ID: DAOS-7159. - - Test Description: Validate Offline Drain during aggregation - - :avocado: tags=all,daily_regression - :avocado: tags=hw,medium - :avocado: tags=osa,osa_drain,offline_drain,checksum - :avocado: tags=OSAOfflineDrain,test_osa_offline_drain_during_aggregation - """ - self.test_during_aggregation = self.params.get( - "test_with_aggregation", "/run/aggregation/*") - self.log.info("Offline Drain : During Aggregation") - self.run_offline_drain_test(1, data=True) - - def test_osa_offline_drain_oclass(self): - """Test ID: DAOS-7159. - - Test Description: Validate Offline Drain with different object class - - :avocado: tags=all,full_regression - :avocado: tags=hw,medium - :avocado: tags=osa,osa_drain,offline_drain - :avocado: tags=OSAOfflineDrain,test_osa_offline_drain_oclass - """ - self.test_with_checksum = self.params.get("test_with_checksum", "/run/checksum/*") - self.log.info("Offline Drain : Oclass") - for oclass in self.test_oclass: - self.run_offline_drain_test(1, data=True, oclass=oclass) - - def test_osa_offline_drain_multiple_pools(self): - """Test ID: DAOS-7159. - - Test Description: Validate Offline Drain with multiple pools - - :avocado: tags=all,full_regression - :avocado: tags=hw,medium - :avocado: tags=osa,osa_drain,offline_drain - :avocado: tags=OSAOfflineDrain,test_osa_offline_drain_multiple_pools - """ - self.log.info("Offline Drain : Multiple Pools") - self.run_offline_drain_test(2, data=True) - - def test_osa_offline_drain_during_rebuild(self): - """Test ID: DAOS-7159. - - Test Description: Validate Offline Drain during rebuild - - :avocado: tags=all,full_regression - :avocado: tags=hw,medium - :avocado: tags=osa,osa_drain,offline_drain,rebuild - :avocado: tags=OSAOfflineDrain,test_osa_offline_drain_during_rebuild - """ - self.test_during_rebuild = self.params.get("test_with_rebuild", "/run/rebuild/*") - self.log.info("Offline Drain : During Rebuild") - self.run_offline_drain_test(1, data=True) - - def test_osa_offline_drain_after_snapshot(self): - """Test ID: DAOS-8057. - - Test Description: Validate Offline Drain after taking snapshot. - - :avocado: tags=all,daily_regression - :avocado: tags=hw,medium - :avocado: tags=osa,osa_drain,offline_drain,checksum - :avocado: tags=OSAOfflineDrain,test_osa_offline_drain_after_snapshot - """ - self.test_with_snapshot = self.params.get("test_with_snapshot", "/run/snapshot/*") - self.log.info("Offline Drain : After taking snapshot") - self.run_offline_drain_test(1, data=True) - - def test_osa_offline_drain_with_less_pool_space(self): - """Test ID: DAOS-7160. - - Test Description: Drain rank after with less pool space. - - :avocado: tags=all,full_regression - :avocado: tags=hw,medium - :avocado: tags=osa,osa_drain,offline_drain,offline_drain_full - :avocado: tags=OSAOfflineDrain,test_osa_offline_drain_with_less_pool_space - """ - self.log.info("Offline Drain : Test with less pool space") - oclass = self.params.get("pool_test_oclass", '/run/pool_capacity/*') - pool_fillup = self.params.get("pool_fillup", '/run/pool_capacity/*') - self.run_offline_drain_test(1, data=True, oclass=oclass, pool_fillup=pool_fillup) +""" + (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import random + +from nvme_utils import ServerFillUp +from osa_utils import OSAUtils +from test_utils_pool import add_pool +from write_host_file import write_host_file + + +class OSAOfflineDrain(OSAUtils, ServerFillUp): + # pylint: disable=too-many-ancestors + """ + Test Class Description: This test runs + daos_server offline drain test cases. + + :avocado: recursive + """ + + def setUp(self): + """Set up for test case.""" + super().setUp() + self.dmg_command = self.get_dmg_command() + self.ranks = self.params.get("rank_list", '/run/test_ranks/*') + self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*') + self.ior_test_sequence = self.params.get( + "ior_test_sequence", '/run/ior/iorflags/*') + # Recreate the client hostfile without slots defined + self.hostfile_clients = write_host_file(self.hostlist_clients, self.workdir) + self.multiple_ranks = None + + def run_offline_drain_test(self, num_pool, data=False, oclass=None, pool_fillup=0): + """Run the offline drain without data. + + Args: + num_pool (int) : total pools to create for testing purposes. + data (bool) : whether pool has no data or to create some data in pool. + Defaults to False. + oclass (str): DAOS object class (eg: RP_2G1,etc) + """ + # Create a pool + pool = {} + target_list = [] + + if oclass is None: + oclass = self.ior_cmd.dfs_oclass.value + + # For testing with multiple ranks as dmg parameters, use a list of ranks. + if self.test_with_multiple_ranks is True: + self.ranks = self.multiple_ranks + + # Exclude target : random two targets (target idx : 0-7) + exc = random.randint(0, 6) # nosec + target_list.append(exc) + target_list.append(exc + 1) + t_string = "{},{}".format(target_list[0], target_list[1]) + + for val in range(0, num_pool): + pool[val] = add_pool(self, connect=False) + self.pool = pool[val] + self.pool.set_property("reclaim", "disabled") + test_seq = self.ior_test_sequence[0] + + if data: + # if pool_fillup is greater than 0, then + # use start_ior_load method from nvme_utils.py. + # Otherwise, use the osa_utils.py run_ior_thread + # method. + if pool_fillup > 0: + self.ior_cmd.dfs_oclass.update(oclass) + self.ior_cmd.dfs_dir_oclass.update(oclass) + self.ior_default_flags = self.ior_w_flags + self.log.info(self.pool.pool_percentage_used()) + self.start_ior_load(storage='NVMe', operation="Auto_Write", percent=pool_fillup) + self.log.info(self.pool.pool_percentage_used()) + else: + self.run_ior_thread("Write", oclass, test_seq) + self.run_mdtest_thread(oclass) + if self.test_with_snapshot is True: + # Create a snapshot of the container + # after IOR job completes. + self.container.create_snap() + self.log.info("Created container snapshot: %s", self.container.epoch) + if self.test_during_aggregation is True: + self.run_ior_thread("Write", oclass, test_seq) + + # Drain ranks and targets + for val in range(0, num_pool): + # Drain ranks provided in YAML file + for index, rank in enumerate(self.ranks): + self.pool = pool[val] + # If we are testing using multiple pools, reintegrate + # the rank back and then drain. + self.pool.display_pool_daos_space("Pool space: Beginning") + # Get initial total free space (scm+nvme) + initial_total_space = self.pool.get_total_space(refresh=True) + pver_begin = self.pool.get_version(True) + self.log.info("Pool Version at the beginning %s", pver_begin) + if self.test_during_aggregation is True and index == 0: + self.pool.set_property("reclaim", "time") + self.delete_extra_container(self.pool) + self.simple_osa_reintegrate_loop(rank=rank, action="drain") + if (self.test_during_rebuild is True and val == 0): + # Exclude rank 3 + self.pool.exclude([3]) + self.pool.wait_for_rebuild_to_start() + # If the pool is filled up just drain only a single rank. + if pool_fillup > 0 and index > 0: + continue + output = self.pool.drain(rank, t_string) + self.print_and_assert_on_rebuild_failure(output) + total_space_after_drain = self.pool.get_total_space(refresh=True) + + pver_drain = self.pool.get_version(True) + self.log.info("Pool Version after drain %d", pver_drain) + # Check pool version incremented after pool drain + self.assertGreater(pver_drain, (pver_begin + 1), + "Pool Version Error: After drain") + if self.test_during_aggregation is False: + self.assertGreater(initial_total_space, total_space_after_drain, + "Expected total space after drain is more than initial") + if num_pool > 1: + output = self.pool.reintegrate(rank, t_string) + self.print_and_assert_on_rebuild_failure(output) + total_space_after_reintegration = self.pool.get_total_space(refresh=True) + self.assertGreater( + total_space_after_reintegration, total_space_after_drain, + "Expected total space after reintegration is less than drain") + if (self.test_during_rebuild is True and val == 0): + # Reintegrate rank 3 + output = self.pool.reintegrate("3") + self.print_and_assert_on_rebuild_failure(output) + total_space_after_reintegration = self.pool.get_total_space(refresh=True) + self.assertGreater( + total_space_after_reintegration, total_space_after_drain, + "Expected total space after reintegration is less than drain") + + for val in range(0, num_pool): + display_string = "Pool{} space at the End".format(val) + pool[val].display_pool_daos_space(display_string) + if data: + if pool_fillup > 0: + self.start_ior_load(storage='NVMe', operation='Auto_Read', percent=pool_fillup) + else: + self.run_ior_thread("Read", oclass, test_seq) + self.run_mdtest_thread(oclass) + self.container = self.pool_cont_dict[self.pool][0] + self.container.daos.env['UCX_LOG_LEVEL'] = 'error' + self.container.check() + + def test_osa_offline_drain(self): + """JIRA ID: DAOS-4750. + + Test Description: Validate Offline Drain + + :avocado: tags=all,pr,daily_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_drain,offline_drain,checksum,ior + :avocado: tags=OSAOfflineDrain,test_osa_offline_drain + """ + self.log.info("Offline Drain : Basic Drain") + self.run_offline_drain_test(1, True) + + def test_osa_offline_drain_without_checksum(self): + """Test ID: DAOS-7159. + + Test Description: Validate Offline Drain without enabling checksum in container properties. + + :avocado: tags=all,full_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_drain,offline_drain + :avocado: tags=OSAOfflineDrain,test_osa_offline_drain_without_checksum + """ + self.test_with_checksum = self.params.get("test_with_checksum", "/run/checksum/*") + self.log.info("Offline Drain : Without Checksum") + self.run_offline_drain_test(1, data=True) + + def test_osa_offline_drain_during_aggregation(self): + """Test ID: DAOS-7159. + + Test Description: Validate Offline Drain during aggregation + + :avocado: tags=all,daily_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_drain,offline_drain,checksum + :avocado: tags=OSAOfflineDrain,test_osa_offline_drain_during_aggregation + """ + self.test_during_aggregation = self.params.get( + "test_with_aggregation", "/run/aggregation/*") + self.log.info("Offline Drain : During Aggregation") + self.run_offline_drain_test(1, data=True) + + def test_osa_offline_drain_oclass(self): + """Test ID: DAOS-7159. + + Test Description: Validate Offline Drain with different object class + + :avocado: tags=all,full_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_drain,offline_drain + :avocado: tags=OSAOfflineDrain,test_osa_offline_drain_oclass + """ + self.test_with_checksum = self.params.get("test_with_checksum", "/run/checksum/*") + self.log.info("Offline Drain : Oclass") + for oclass in self.test_oclass: + self.run_offline_drain_test(1, data=True, oclass=oclass) + + def test_osa_offline_drain_multiple_pools(self): + """Test ID: DAOS-7159. + + Test Description: Validate Offline Drain with multiple pools + + :avocado: tags=all,full_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_drain,offline_drain + :avocado: tags=OSAOfflineDrain,test_osa_offline_drain_multiple_pools + """ + self.log.info("Offline Drain : Multiple Pools") + self.run_offline_drain_test(2, data=True) + + def test_osa_offline_drain_during_rebuild(self): + """Test ID: DAOS-7159. + + Test Description: Validate Offline Drain during rebuild + + :avocado: tags=all,full_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_drain,offline_drain,rebuild + :avocado: tags=OSAOfflineDrain,test_osa_offline_drain_during_rebuild + """ + self.test_during_rebuild = self.params.get("test_with_rebuild", "/run/rebuild/*") + self.log.info("Offline Drain : During Rebuild") + self.run_offline_drain_test(1, data=True) + + def test_osa_offline_drain_after_snapshot(self): + """Test ID: DAOS-8057. + + Test Description: Validate Offline Drain after taking snapshot. + + :avocado: tags=all,daily_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_drain,offline_drain,checksum + :avocado: tags=OSAOfflineDrain,test_osa_offline_drain_after_snapshot + """ + self.test_with_snapshot = self.params.get("test_with_snapshot", "/run/snapshot/*") + self.log.info("Offline Drain : After taking snapshot") + self.run_offline_drain_test(1, data=True) + + def test_osa_offline_drain_with_less_pool_space(self): + """Test ID: DAOS-7160. + + Test Description: Drain rank after with less pool space. + + :avocado: tags=all,full_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_drain,offline_drain,offline_drain_full + :avocado: tags=OSAOfflineDrain,test_osa_offline_drain_with_less_pool_space + """ + self.log.info("Offline Drain : Test with less pool space") + oclass = self.params.get("pool_test_oclass", '/run/pool_capacity/*') + pool_fillup = self.params.get("pool_fillup", '/run/pool_capacity/*') + self.run_offline_drain_test(1, data=True, oclass=oclass, pool_fillup=pool_fillup) + + def test_osa_offline_drain_with_multiple_ranks(self): + """Test ID: DAOS-4753. + + Test Description: Drain multiple ranks at the same time. + + :avocado: tags=all,full_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_drain,offline_drain,offline_drain_full + :avocado: tags=OSAOfflineDrain,test_osa_offline_drain_with_multiple_ranks + """ + self.log.info("Offline Drain : Test with mutiple ranks") + self.test_with_multiple_ranks = self.params.get("test_with_multiple_ranks", + '/run/multiple_ranks/*') + self.multiple_ranks = self.params.get("rank_list", '/run/multiple_ranks/*') + self.run_offline_drain_test(1, data=True) diff --git a/src/tests/ftest/osa/offline_drain.yaml b/src/tests/ftest/osa/offline_drain.yaml index a8776edd4a4..76182b8e1ff 100644 --- a/src/tests/ftest/osa/offline_drain.yaml +++ b/src/tests/ftest/osa/offline_drain.yaml @@ -1,107 +1,110 @@ -hosts: - test_servers: 3 - test_clients: 1 -timeout: 2400 -setup: - start_servers_once: false -skip_add_log_msg: true -server_config: - name: daos_server - engines_per_host: 2 - engines: - 0: - pinned_numa_node: 0 - nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 - log_file: daos_server0.log - log_mask: INFO,MEM=ERR - env_vars: - - DD_MASK=mgmt,md - storage: auto - 1: - pinned_numa_node: 1 - nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 - log_file: daos_server1.log - log_mask: INFO,MEM=ERR - env_vars: - - DD_MASK=mgmt,md - storage: auto -pool: - scm_size: 12000000000 - nvme_size: 108000000000 - svcn: 4 - rebuild_timeout: 240 - properties: scrub:timed -container: - type: POSIX - control_method: daos - oclass: RP_3G6 - properties: cksum:crc64,cksum_size:16384,srv_cksum:on,rd_fac:2 -dkeys: - single: - no_of_dkeys: - - 50 -akeys: - single: - no_of_akeys: - - 10 -record: - 1KB: - length: - - 1024 -ior: - clientslots: - slots: 48 - test_file: /testFile - repetitions: 1 - dfs_destroy: false - iorflags: - write_flags: "-w -F -k -G 1" - read_flags: "-F -r -R -k -G 1" - api: DFS - dfs_oclass: RP_3G6 - dfs_dir_oclass: RP_3G6 - ior_test_sequence: - # - [scmsize, nvmesize, transfersize, blocksize] - # The values are set to be in the multiples of 10. - # Values are appx GB. - - [12000000000, 108000000000, 500000, 500000000] -mdtest: - api: DFS - client_processes: - np: 30 - num_of_files_dirs: 4067 # creating total of 120K files - test_dir: "/" - iteration: 1 - dfs_destroy: false - dfs_oclass: RP_3G6 - dfs_dir_oclass: RP_3G6 - manager: "MPICH" - flags: "-u" - wr_size: - 32K: - write_bytes: 32768 - read_bytes: 32768 - verbosity_value: 1 - depth: 0 -test_obj_class: - oclass: - - RP_2G8 - - RP_4G1 - - EC_2P1G1 -aggregation: - test_with_aggregation: true -rebuild: - test_with_rebuild: true -checksum: - test_with_checksum: false -snapshot: - test_with_snapshot: true -test_ranks: - rank_list: ["2", "5"] -pool_capacity: - pool_fillup: 10 - pool_test_oclass: RP_2GX +hosts: + test_servers: 3 + test_clients: 1 +timeout: 2400 +setup: + start_servers_once: false +skip_add_log_msg: true +server_config: + name: daos_server + engines_per_host: 2 + engines: + 0: + pinned_numa_node: 0 + nr_xs_helpers: 1 + fabric_iface: ib0 + fabric_iface_port: 31416 + log_file: daos_server0.log + log_mask: INFO,MEM=ERR + env_vars: + - DD_MASK=mgmt,md + storage: auto + 1: + pinned_numa_node: 1 + nr_xs_helpers: 1 + fabric_iface: ib1 + fabric_iface_port: 31516 + log_file: daos_server1.log + log_mask: INFO,MEM=ERR + env_vars: + - DD_MASK=mgmt,md + storage: auto +pool: + scm_size: 12000000000 + nvme_size: 108000000000 + svcn: 4 + rebuild_timeout: 240 + properties: scrub:timed +container: + type: POSIX + control_method: daos + oclass: RP_3G6 + properties: cksum:crc64,cksum_size:16384,srv_cksum:on,rd_fac:2 +dkeys: + single: + no_of_dkeys: + - 50 +akeys: + single: + no_of_akeys: + - 10 +record: + 1KB: + length: + - 1024 +ior: + clientslots: + slots: 48 + test_file: /testFile + repetitions: 1 + dfs_destroy: false + iorflags: + write_flags: "-w -F -k -G 1" + read_flags: "-F -r -R -k -G 1" + api: DFS + dfs_oclass: RP_3G6 + dfs_dir_oclass: RP_3G6 + ior_test_sequence: + # - [scmsize, nvmesize, transfersize, blocksize] + # The values are set to be in the multiples of 10. + # Values are appx GB. + - [12000000000, 108000000000, 500000, 500000000] +mdtest: + api: DFS + client_processes: + np: 30 + num_of_files_dirs: 4067 # creating total of 120K files + test_dir: "/" + iteration: 1 + dfs_destroy: false + dfs_oclass: RP_3G6 + dfs_dir_oclass: RP_3G6 + manager: "MPICH" + flags: "-u" + wr_size: + 32K: + write_bytes: 32768 + read_bytes: 32768 + verbosity_value: 1 + depth: 0 +test_obj_class: + oclass: + - RP_2G8 + - RP_4G1 + - EC_2P1G1 +aggregation: + test_with_aggregation: true +rebuild: + test_with_rebuild: true +checksum: + test_with_checksum: false +snapshot: + test_with_snapshot: true +mutliple_ranks: + test_with_multiple_ranks: true + rank_list: ["1, 2"] +test_ranks: + rank_list: ["2", "5"] +pool_capacity: + pool_fillup: 10 + pool_test_oclass: RP_2GX diff --git a/src/tests/ftest/osa/online_drain.py b/src/tests/ftest/osa/online_drain.py index 3ee4f436256..07090d3d788 100644 --- a/src/tests/ftest/osa/online_drain.py +++ b/src/tests/ftest/osa/online_drain.py @@ -1,5 +1,6 @@ """ (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -186,3 +187,19 @@ def test_osa_online_drain_mdtest(self): """ self.log.info("Online Drain : With Mdtest") self.run_online_drain_test(1, app_name="mdtest") + + def test_osa_online_drain_with_multiple_ranks(self): + """Test ID: DAOS-4753. + + Test Description: Drain multiple ranks at the same time. + + :avocado: tags=all,daily_regression + :avocado: tags=hw,medium + :avocado: tags=osa,osa_drain,online_drain + :avocado: tags=OSAOnlineDrain,test_osa_online_drain_with_multiple_ranks + """ + self.log.info("Online Drain : Test with mutiple ranks") + self.test_with_multiple_ranks = self.params.get("test_with_multiple_ranks", + '/run/multiple_ranks/*') + self.multiple_ranks = self.params.get("rank_list", '/run/multiple_ranks/*') + self.run_online_drain_test(1, data=True) \ No newline at end of file diff --git a/src/tests/ftest/osa/online_drain.yaml b/src/tests/ftest/osa/online_drain.yaml index 738683694b1..ed2a9f2ee49 100644 --- a/src/tests/ftest/osa/online_drain.yaml +++ b/src/tests/ftest/osa/online_drain.yaml @@ -90,3 +90,6 @@ rebuild: test_with_rebuild: true checksum: test_with_checksum: false +mutliple_ranks: + test_with_multiple_ranks: true + rank_list: ["1, 2"] \ No newline at end of file diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py index 410b6ce46a2..54d526509fa 100644 --- a/src/tests/ftest/util/osa_utils.py +++ b/src/tests/ftest/util/osa_utils.py @@ -1,444 +1,446 @@ -""" - (C) Copyright 2020-2024 Intel Corporation. - - SPDX-License-Identifier: BSD-2-Clause-Patent -""" -import queue -import re -import threading -import time - -from avocado import fail_on -from exception_utils import CommandFailure -from general_utils import run_command -from ior_test_base import IorTestBase -from mdtest_test_base import MdtestBase - - -class OSAUtils(MdtestBase, IorTestBase): - """Test Class Description: This test runs daos_server offline drain test cases. - - :avocado: recursive - """ - - def setUp(self): - """Set up for test case.""" - super().setUp() - self.pool_cont_dict = {} - self.container = None - self.obj = None - self.ioreq = None - self.dmg_command = self.get_dmg_command() - self.no_of_dkeys = self.params.get("no_of_dkeys", '/run/dkeys/*', - default=[0])[0] - self.no_of_akeys = self.params.get("no_of_akeys", '/run/akeys/*', - default=[0])[0] - self.record_length = self.params.get("length", '/run/record/*', - default=[0])[0] - self.ior_w_flags = self.params.get("write_flags", '/run/ior/iorflags/*', - default="") - self.ior_r_flags = self.params.get("read_flags", '/run/ior/iorflags/*') - self.server_count = len(self.hostlist_servers) - self.engine_count = self.server_managers[0].get_config_value( - "engines_per_host") - self.out_queue = queue.Queue() - self.dmg_command.exit_status_exception = False - self.test_during_aggregation = False - self.test_during_rebuild = False - self.test_with_checksum = True - # By default, test_with_rf is set to False. - # It is up to individual test to enable it. - self.test_with_rf = False - self.test_with_blank_node = False - self.test_with_snapshot = False - - @fail_on(CommandFailure) - def assert_on_rebuild_failure(self): - """If the rebuild is not successful, raise assert.""" - rebuild_status = self.pool.get_rebuild_status(True) - self.log.info("Rebuild Status: %s", rebuild_status) - if rebuild_status in ["failed", "scanning", "aborted", "busy"]: - self.fail("Rebuild failed") - - @fail_on(CommandFailure) - def print_and_assert_on_rebuild_failure(self, out, timeout=3): - """Print the out value (daos, dmg, etc) and check for rebuild completion. - - If rebuild does not complete, raise an assertion. - """ - self.log.info(out) - self.pool.wait_for_rebuild_to_start() - self.pool.wait_for_rebuild_to_end(timeout) - self.assert_on_rebuild_failure() - - @fail_on(CommandFailure) - def get_ipaddr_for_rank(self, rank=None): - """Obtain the IPAddress and port number for a particular server rank. - - Args: - rank (int): daos_engine rank. Defaults to None. - - Returns: - ip_addr (str) : IPAddress for the rank. - port_num (str) : Port number for the rank. - """ - output = self.dmg_command.system_query() - members_length = self.server_count * self.engine_count - for index in range(0, members_length): - if rank == int(output["response"]["members"][index]["rank"]): - temp = output["response"]["members"][index]["addr"] - ip_addr = temp.split(":") - temp = output["response"]["members"][index]["fabric_uri"] - port_num = temp.split(":") - return ip_addr[0], port_num[2] - return None, None - - @fail_on(CommandFailure) - def remove_pool_dir(self, ip_addr=None, port_num=None): - """Remove the /mnt/daos[x]//vos-* directory. - - Args: - ip_addr (str): IP address of the daos server. Defaults to None. - port_number (str) : Port number the daos server. - """ - # Create the expected port list - # expected_ports = [port0] - Single engine/server - # expected_ports = [port0, port1] - Two engine/server - expected_ports = [engine_param.get_value("fabric_iface_port") - for engine_param in self.server_managers[-1]. - manager.job.yaml.engine_params] - self.log.info("Expected ports : %s", expected_ports) - if ip_addr is None or port_num is None: - self.log.info("ip_addr : %s port_number: %s", ip_addr, port_num) - self.fail("No IP Address or Port number provided") - else: - if self.engine_count == 1: - self.log.info("Single Engine per Server") - cmd = "/usr/bin/ssh {} -oStrictHostKeyChecking=no \ - sudo rm -rf /mnt/daos/{}/vos-*". \ - format(ip_addr, self.pool.uuid) - elif self.engine_count == 2: - if port_num == str(expected_ports[0]): - port_val = 0 - elif port_num == str(expected_ports[1]): - port_val = 1 - else: - port_val = None # To appease pylint - self.log.info("port_number: %s", port_num) - self.fail("Invalid port number") - cmd = "/usr/bin/ssh {} -oStrictHostKeyChecking=no \ - sudo rm -rf /mnt/daos{}/{}/vos-*". \ - format(ip_addr, port_val, self.pool.uuid) - else: - cmd = None # To appease pylint - self.fail("Not supported engine per server configuration") - run_command(cmd) - - def set_container(self, container): - """Set the OSA utils container object. - - Args: - container (TestContainer): Container object to be used within OSA utils. - """ - self.container = container - - def simple_osa_reintegrate_loop(self, rank, action="exclude", loop_time=100): - """Exclude or drain and reintegrate a rank for a certain amount of time. - - Args: - rank (int): daos server rank. - action (str, optional): "exclude" or "drain". Defaults to "exclude" - loop_time (int, optional): Total time to perform drain/reintegrate operation in a loop. - Defaults to 100. - """ - start_time = 0 - finish_time = 0 - start_time = time.time() - while int(finish_time - start_time) < loop_time: - if action == "exclude": - output = self.pool.exclude(rank) - else: - output = self.pool.drain(rank) - self.print_and_assert_on_rebuild_failure(output) - output = self.pool.reintegrate(rank) - self.print_and_assert_on_rebuild_failure(output) - finish_time = time.time() - - def prepare_cont_ior_write_read(self, oclass, flags): - """Prepare the containers for IOR write and read invocations. - - To enable aggregation: - - Create two containers and read always from first container - Normal usage (use only a single container): - - Create a single container and use the same. - - Args: - oclass (str): IOR object class - flags (str): IOR flags - """ - self.log.info(self.pool_cont_dict) - # If pool is not in the dictionary, - # initialize its container list to None - # {pool : [None, None], [None, None]} - if self.pool not in self.pool_cont_dict: - self.pool_cont_dict[self.pool] = [None] * 4 - # Create container if the pool doesn't have one. - # Otherwise, use the existing container in the pool. - # pool_cont_dict {pool A: [containerA, Updated, - # containerB, Updated], - # pool B : containerA, Updated, - # containerB, None]} - if self.pool_cont_dict[self.pool][0] is None: - self.add_container(self.pool, create=False) - self.set_cont_class_properties(oclass) - if self.test_with_checksum is False: - tmp = self.get_object_replica_value(oclass) - rf_value = "rd_fac:{}".format(tmp - 1) - self.update_cont_properties(rf_value) - self.container.create() - self.pool_cont_dict[self.pool][0] = self.container - self.pool_cont_dict[self.pool][1] = "Updated" - else: - if ((self.test_during_aggregation is True) - and (self.pool_cont_dict[self.pool][1] == "Updated") - and (self.pool_cont_dict[self.pool][3] is None) - and ("-w" in flags)): - # Write to the second container - self.add_container(self.pool, create=False) - self.set_cont_class_properties(oclass) - if self.test_with_checksum is False: - tmp = self.get_object_replica_value(oclass) - rf_value = "rd_fac:{}".format(tmp - 1) - self.update_cont_properties(rf_value) - self.container.create() - self.pool_cont_dict[self.pool][2] = self.container - self.pool_cont_dict[self.pool][3] = "Updated" - else: - self.container = self.pool_cont_dict[self.pool][0] - - def delete_extra_container(self, pool): - """Delete the extra container in the pool. - - Refer prepare_cont_ior_write_read. This method should be called when OSA tests intend to - enable aggregation. - - Args: - pool (TestPool): pool object - """ - self.pool.set_property("reclaim", "time") - extra_container = self.pool_cont_dict[pool][2] - extra_container.destroy() - self.pool_cont_dict[pool][3] = None - - def get_object_replica_value(self, oclass): - """Get the object replica value for an object class. - - Args: - oclass (str): Object Class (eg: RP_2G1,etc) - - Returns: - int: Object replica value - - """ - value = 0 - if "_" in oclass: - replica_list = oclass.split("_") - value = replica_list[1][0] - else: - self.log.info("Wrong Object Class. Cannot split") - return int(value) - - def update_cont_properties(self, cont_prop): - """Update the existing container properties. - - Args: - cont_prop (str): Replace existing container properties with new value - """ - self.container.properties.value = cont_prop - - def set_cont_class_properties(self, oclass="S1"): - """Update the container class to match the IOR/Mdtest object class. - - Fix the rf factor based on object replica value. - Also, remove the redundancy factor for S type object class. - - Args: - oclass (str, optional): Container object class to be set. Defaults to "S1". - """ - self.container.oclass.value = oclass - # Set the container properties properly for S!, S2 class. - # rf should not be set to 1 for S type object class. - match = re.search("^S\\d$", oclass) - prop = self.container.properties.value - if match is not None: - prop = prop.replace("rd_fac:1", "rd_fac:0") - else: - tmp = self.get_object_replica_value(oclass) - rf_value = "rd_fac:{}".format(tmp - 1) - prop = prop.replace("rd_fac:1", rf_value) - self.container.properties.value = prop - # Over-write oclass settings if using redundancy factor - # and self.test_with_rf is True. - # This has to be done so that container created doesn't - # use the object class. - if self.test_with_rf is True and \ - "rf" in self.container.properties.value: - self.log.info( - "Detected container redundancy factor: %s", - self.container.properties.value) - self.ior_cmd.dfs_oclass.update(None, "ior.dfs_oclass") - self.ior_cmd.dfs_dir_oclass.update(None, "ior.dfs_dir_oclass") - self.container.oclass.update(None) - - def assert_on_exception(self, out_queue=None): - """Assert on exception while executing an application. - - Args: - out_queue (queue): Check whether the queue is empty. If empty, app (ior, mdtest) didn't - encounter error. - """ - if out_queue is None: - out_queue = self.out_queue - if out_queue.empty(): - pass - else: - exc = out_queue.get(block=False) - out_queue.put(exc) - raise CommandFailure(exc) - - def cleanup_queue(self, out_queue=None): - """Cleanup the existing thread queue. - - Args: - out_queue (queue): Queue to cleanup. - """ - if out_queue is None: - out_queue = self.out_queue - while not out_queue.empty(): - out_queue.get(block=True) - - def run_ior_thread(self, action, oclass, test, single_cont_read=True, fail_on_warning=True, - pool=None): - """Start the IOR thread for either writing or reading data to/from a container. - - Args: - action (str): Start the IOR thread with Read or Write - oclass (str): IOR object class - test (list): IOR test sequence - flags (str): IOR flags - single_cont_read (bool, optional): Always read from the 1st container. Defaults to True. - fail_on_warning (bool, optional): Test terminates for IOR warnings. Defaults to True. - pool (TestPool, optional): Pool to run ior on. Defaults to None. - - """ - # Intermediate (between correct and hack) implementation for allowing a - # pool to be passed in. Needs to be fixed by making the pool argument - # required. - if pool is None: - pool = self.pool - - self.cleanup_queue() - if action == "Write": - flags = self.ior_w_flags - else: - flags = self.ior_r_flags - - # Add a thread for these IOR arguments - process = threading.Thread(target=self.ior_thread, - kwargs={"pool": pool, - "oclass": oclass, - "test": test, - "flags": flags, - "single_cont_read": - single_cont_read, - "fail_on_warning": - fail_on_warning}) - # Launch the IOR thread - process.start() - # Wait for the thread to finish - process.join() - if fail_on_warning and not self.out_queue.empty(): - self.assert_on_exception() - - def ior_thread(self, pool, oclass, test, flags, single_cont_read=True, fail_on_warning=True): - """Start an IOR thread. - - Args: - pool (object): pool handle - oclass (str): IOR object class, container class. - test (list): IOR test sequence - flags (str): IOR flags - single_cont_read (bool, optional): Always read from the 1st container. Defaults to True. - fail_on_warning (bool, optional): Test terminates for IOR warnings. Defaults to True. - """ - self.cleanup_queue() - self.pool = pool - self.ior_cmd.get_params(self) - self.ior_cmd.set_daos_params(self.pool, None) - self.log.info("Redundancy Factor : %s", self.test_with_rf) - self.ior_cmd.dfs_oclass.update(oclass) - self.ior_cmd.dfs_dir_oclass.update(oclass) - if single_cont_read is True: - # Prepare the containers created and use in a specific - # way defined in prepare_cont_ior_write. - self.prepare_cont_ior_write_read(oclass, flags) - elif single_cont_read is False and self.container is not None: - # Here self.container is having actual value. Just use it. - self.log.info(self.container) - else: - self.fail("Not supported option on ior_thread") - try: - job_manager = self.get_ior_job_manager_command() - except CommandFailure as err_msg: - self.out_queue.put(err_msg) - self.assert_on_exception() - job_manager.job.dfs_cont.update(self.container.identifier) - self.ior_cmd.transfer_size.update(test[2]) - self.ior_cmd.block_size.update(test[3]) - self.ior_cmd.flags.update(flags) - # Update oclass settings if using redundancy factor - # and self.test_with_rf is True. - if self.test_with_rf is True and "rf" in self.container.properties.value: - self.log.info( - "Detected container redundancy factor: %s", self.container.properties.value) - self.ior_cmd.dfs_oclass.update(None, "ior.dfs_oclass") - self.ior_cmd.dfs_dir_oclass.update(None, "ior.dfs_dir_oclass") - # Run run_ior_with_pool without invoking the pool query method for - # displaying pool space information (display_space=False) - self.run_ior_with_pool(create_pool=False, create_cont=False, - fail_on_warning=fail_on_warning, - display_space=False, - out_queue=self.out_queue) - if fail_on_warning and not self.out_queue.empty(): - self.assert_on_exception() - - def run_mdtest_thread(self, oclass="RP_2G1"): - """Start mdtest thread and wait until thread completes. - - Args: - oclass (str): IOR object class, container class. - """ - # Create container only - self.mdtest_cmd.dfs_destroy.update(False) - create_container = 0 - if self.container is None: - self.add_container(self.pool, create=False) - create_container = 1 - self.mdtest_cmd.dfs_oclass.update(oclass) - self.set_cont_class_properties(oclass) - if self.test_with_checksum is False: - tmp = self.get_object_replica_value(oclass) - rf_value = "rd_fac:{}".format(tmp - 1) - self.update_cont_properties(rf_value) - if create_container == 1: - self.container.create() - job_manager = self.get_mdtest_job_manager_command(self.manager) - job_manager.job.dfs_cont.update(self.container.identifier) - # Add a thread for these IOR arguments - process = threading.Thread(target=self.execute_mdtest) - # Launch the MDtest thread - process.start() - # Wait for the thread to finish - process.join() - if not self.out_queue.empty(): - self.assert_on_exception() +""" + (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import queue +import re +import threading +import time + +from avocado import fail_on +from exception_utils import CommandFailure +from general_utils import run_command +from ior_test_base import IorTestBase +from mdtest_test_base import MdtestBase + + +class OSAUtils(MdtestBase, IorTestBase): + """Test Class Description: This test runs daos_server offline drain test cases. + + :avocado: recursive + """ + + def setUp(self): + """Set up for test case.""" + super().setUp() + self.pool_cont_dict = {} + self.container = None + self.obj = None + self.ioreq = None + self.dmg_command = self.get_dmg_command() + self.no_of_dkeys = self.params.get("no_of_dkeys", '/run/dkeys/*', + default=[0])[0] + self.no_of_akeys = self.params.get("no_of_akeys", '/run/akeys/*', + default=[0])[0] + self.record_length = self.params.get("length", '/run/record/*', + default=[0])[0] + self.ior_w_flags = self.params.get("write_flags", '/run/ior/iorflags/*', + default="") + self.ior_r_flags = self.params.get("read_flags", '/run/ior/iorflags/*') + self.server_count = len(self.hostlist_servers) + self.engine_count = self.server_managers[0].get_config_value( + "engines_per_host") + self.out_queue = queue.Queue() + self.dmg_command.exit_status_exception = False + self.test_during_aggregation = False + self.test_during_rebuild = False + self.test_with_checksum = True + # By default, test_with_rf is set to False. + # It is up to individual test to enable it. + self.test_with_rf = False + self.test_with_blank_node = False + self.test_with_snapshot = False + self.test_with_multiple_ranks = False + + @fail_on(CommandFailure) + def assert_on_rebuild_failure(self): + """If the rebuild is not successful, raise assert.""" + rebuild_status = self.pool.get_rebuild_status(True) + self.log.info("Rebuild Status: %s", rebuild_status) + if rebuild_status in ["failed", "scanning", "aborted", "busy"]: + self.fail("Rebuild failed") + + @fail_on(CommandFailure) + def print_and_assert_on_rebuild_failure(self, out, timeout=3): + """Print the out value (daos, dmg, etc) and check for rebuild completion. + + If rebuild does not complete, raise an assertion. + """ + self.log.info(out) + self.pool.wait_for_rebuild_to_start() + self.pool.wait_for_rebuild_to_end(timeout) + self.assert_on_rebuild_failure() + + @fail_on(CommandFailure) + def get_ipaddr_for_rank(self, rank=None): + """Obtain the IPAddress and port number for a particular server rank. + + Args: + rank (int): daos_engine rank. Defaults to None. + + Returns: + ip_addr (str) : IPAddress for the rank. + port_num (str) : Port number for the rank. + """ + output = self.dmg_command.system_query() + members_length = self.server_count * self.engine_count + for index in range(0, members_length): + if rank == int(output["response"]["members"][index]["rank"]): + temp = output["response"]["members"][index]["addr"] + ip_addr = temp.split(":") + temp = output["response"]["members"][index]["fabric_uri"] + port_num = temp.split(":") + return ip_addr[0], port_num[2] + return None, None + + @fail_on(CommandFailure) + def remove_pool_dir(self, ip_addr=None, port_num=None): + """Remove the /mnt/daos[x]//vos-* directory. + + Args: + ip_addr (str): IP address of the daos server. Defaults to None. + port_number (str) : Port number the daos server. + """ + # Create the expected port list + # expected_ports = [port0] - Single engine/server + # expected_ports = [port0, port1] - Two engine/server + expected_ports = [engine_param.get_value("fabric_iface_port") + for engine_param in self.server_managers[-1]. + manager.job.yaml.engine_params] + self.log.info("Expected ports : %s", expected_ports) + if ip_addr is None or port_num is None: + self.log.info("ip_addr : %s port_number: %s", ip_addr, port_num) + self.fail("No IP Address or Port number provided") + else: + if self.engine_count == 1: + self.log.info("Single Engine per Server") + cmd = "/usr/bin/ssh {} -oStrictHostKeyChecking=no \ + sudo rm -rf /mnt/daos/{}/vos-*". \ + format(ip_addr, self.pool.uuid) + elif self.engine_count == 2: + if port_num == str(expected_ports[0]): + port_val = 0 + elif port_num == str(expected_ports[1]): + port_val = 1 + else: + port_val = None # To appease pylint + self.log.info("port_number: %s", port_num) + self.fail("Invalid port number") + cmd = "/usr/bin/ssh {} -oStrictHostKeyChecking=no \ + sudo rm -rf /mnt/daos{}/{}/vos-*". \ + format(ip_addr, port_val, self.pool.uuid) + else: + cmd = None # To appease pylint + self.fail("Not supported engine per server configuration") + run_command(cmd) + + def set_container(self, container): + """Set the OSA utils container object. + + Args: + container (TestContainer): Container object to be used within OSA utils. + """ + self.container = container + + def simple_osa_reintegrate_loop(self, rank, action="exclude", loop_time=100): + """Exclude or drain and reintegrate a rank for a certain amount of time. + + Args: + rank (int): daos server rank. + action (str, optional): "exclude" or "drain". Defaults to "exclude" + loop_time (int, optional): Total time to perform drain/reintegrate operation in a loop. + Defaults to 100. + """ + start_time = 0 + finish_time = 0 + start_time = time.time() + while int(finish_time - start_time) < loop_time: + if action == "exclude": + output = self.pool.exclude(rank) + else: + output = self.pool.drain(rank) + self.print_and_assert_on_rebuild_failure(output) + output = self.pool.reintegrate(rank) + self.print_and_assert_on_rebuild_failure(output) + finish_time = time.time() + + def prepare_cont_ior_write_read(self, oclass, flags): + """Prepare the containers for IOR write and read invocations. + + To enable aggregation: + - Create two containers and read always from first container + Normal usage (use only a single container): + - Create a single container and use the same. + + Args: + oclass (str): IOR object class + flags (str): IOR flags + """ + self.log.info(self.pool_cont_dict) + # If pool is not in the dictionary, + # initialize its container list to None + # {pool : [None, None], [None, None]} + if self.pool not in self.pool_cont_dict: + self.pool_cont_dict[self.pool] = [None] * 4 + # Create container if the pool doesn't have one. + # Otherwise, use the existing container in the pool. + # pool_cont_dict {pool A: [containerA, Updated, + # containerB, Updated], + # pool B : containerA, Updated, + # containerB, None]} + if self.pool_cont_dict[self.pool][0] is None: + self.add_container(self.pool, create=False) + self.set_cont_class_properties(oclass) + if self.test_with_checksum is False: + tmp = self.get_object_replica_value(oclass) + rf_value = "rd_fac:{}".format(tmp - 1) + self.update_cont_properties(rf_value) + self.container.create() + self.pool_cont_dict[self.pool][0] = self.container + self.pool_cont_dict[self.pool][1] = "Updated" + else: + if ((self.test_during_aggregation is True) + and (self.pool_cont_dict[self.pool][1] == "Updated") + and (self.pool_cont_dict[self.pool][3] is None) + and ("-w" in flags)): + # Write to the second container + self.add_container(self.pool, create=False) + self.set_cont_class_properties(oclass) + if self.test_with_checksum is False: + tmp = self.get_object_replica_value(oclass) + rf_value = "rd_fac:{}".format(tmp - 1) + self.update_cont_properties(rf_value) + self.container.create() + self.pool_cont_dict[self.pool][2] = self.container + self.pool_cont_dict[self.pool][3] = "Updated" + else: + self.container = self.pool_cont_dict[self.pool][0] + + def delete_extra_container(self, pool): + """Delete the extra container in the pool. + + Refer prepare_cont_ior_write_read. This method should be called when OSA tests intend to + enable aggregation. + + Args: + pool (TestPool): pool object + """ + self.pool.set_property("reclaim", "time") + extra_container = self.pool_cont_dict[pool][2] + extra_container.destroy() + self.pool_cont_dict[pool][3] = None + + def get_object_replica_value(self, oclass): + """Get the object replica value for an object class. + + Args: + oclass (str): Object Class (eg: RP_2G1,etc) + + Returns: + int: Object replica value + + """ + value = 0 + if "_" in oclass: + replica_list = oclass.split("_") + value = replica_list[1][0] + else: + self.log.info("Wrong Object Class. Cannot split") + return int(value) + + def update_cont_properties(self, cont_prop): + """Update the existing container properties. + + Args: + cont_prop (str): Replace existing container properties with new value + """ + self.container.properties.value = cont_prop + + def set_cont_class_properties(self, oclass="S1"): + """Update the container class to match the IOR/Mdtest object class. + + Fix the rf factor based on object replica value. + Also, remove the redundancy factor for S type object class. + + Args: + oclass (str, optional): Container object class to be set. Defaults to "S1". + """ + self.container.oclass.value = oclass + # Set the container properties properly for S!, S2 class. + # rf should not be set to 1 for S type object class. + match = re.search("^S\\d$", oclass) + prop = self.container.properties.value + if match is not None: + prop = prop.replace("rd_fac:1", "rd_fac:0") + else: + tmp = self.get_object_replica_value(oclass) + rf_value = "rd_fac:{}".format(tmp - 1) + prop = prop.replace("rd_fac:1", rf_value) + self.container.properties.value = prop + # Over-write oclass settings if using redundancy factor + # and self.test_with_rf is True. + # This has to be done so that container created doesn't + # use the object class. + if self.test_with_rf is True and \ + "rf" in self.container.properties.value: + self.log.info( + "Detected container redundancy factor: %s", + self.container.properties.value) + self.ior_cmd.dfs_oclass.update(None, "ior.dfs_oclass") + self.ior_cmd.dfs_dir_oclass.update(None, "ior.dfs_dir_oclass") + self.container.oclass.update(None) + + def assert_on_exception(self, out_queue=None): + """Assert on exception while executing an application. + + Args: + out_queue (queue): Check whether the queue is empty. If empty, app (ior, mdtest) didn't + encounter error. + """ + if out_queue is None: + out_queue = self.out_queue + if out_queue.empty(): + pass + else: + exc = out_queue.get(block=False) + out_queue.put(exc) + raise CommandFailure(exc) + + def cleanup_queue(self, out_queue=None): + """Cleanup the existing thread queue. + + Args: + out_queue (queue): Queue to cleanup. + """ + if out_queue is None: + out_queue = self.out_queue + while not out_queue.empty(): + out_queue.get(block=True) + + def run_ior_thread(self, action, oclass, test, single_cont_read=True, fail_on_warning=True, + pool=None): + """Start the IOR thread for either writing or reading data to/from a container. + + Args: + action (str): Start the IOR thread with Read or Write + oclass (str): IOR object class + test (list): IOR test sequence + flags (str): IOR flags + single_cont_read (bool, optional): Always read from the 1st container. Defaults to True. + fail_on_warning (bool, optional): Test terminates for IOR warnings. Defaults to True. + pool (TestPool, optional): Pool to run ior on. Defaults to None. + + """ + # Intermediate (between correct and hack) implementation for allowing a + # pool to be passed in. Needs to be fixed by making the pool argument + # required. + if pool is None: + pool = self.pool + + self.cleanup_queue() + if action == "Write": + flags = self.ior_w_flags + else: + flags = self.ior_r_flags + + # Add a thread for these IOR arguments + process = threading.Thread(target=self.ior_thread, + kwargs={"pool": pool, + "oclass": oclass, + "test": test, + "flags": flags, + "single_cont_read": + single_cont_read, + "fail_on_warning": + fail_on_warning}) + # Launch the IOR thread + process.start() + # Wait for the thread to finish + process.join() + if fail_on_warning and not self.out_queue.empty(): + self.assert_on_exception() + + def ior_thread(self, pool, oclass, test, flags, single_cont_read=True, fail_on_warning=True): + """Start an IOR thread. + + Args: + pool (object): pool handle + oclass (str): IOR object class, container class. + test (list): IOR test sequence + flags (str): IOR flags + single_cont_read (bool, optional): Always read from the 1st container. Defaults to True. + fail_on_warning (bool, optional): Test terminates for IOR warnings. Defaults to True. + """ + self.cleanup_queue() + self.pool = pool + self.ior_cmd.get_params(self) + self.ior_cmd.set_daos_params(self.pool, None) + self.log.info("Redundancy Factor : %s", self.test_with_rf) + self.ior_cmd.dfs_oclass.update(oclass) + self.ior_cmd.dfs_dir_oclass.update(oclass) + if single_cont_read is True: + # Prepare the containers created and use in a specific + # way defined in prepare_cont_ior_write. + self.prepare_cont_ior_write_read(oclass, flags) + elif single_cont_read is False and self.container is not None: + # Here self.container is having actual value. Just use it. + self.log.info(self.container) + else: + self.fail("Not supported option on ior_thread") + try: + job_manager = self.get_ior_job_manager_command() + except CommandFailure as err_msg: + self.out_queue.put(err_msg) + self.assert_on_exception() + job_manager.job.dfs_cont.update(self.container.identifier) + self.ior_cmd.transfer_size.update(test[2]) + self.ior_cmd.block_size.update(test[3]) + self.ior_cmd.flags.update(flags) + # Update oclass settings if using redundancy factor + # and self.test_with_rf is True. + if self.test_with_rf is True and "rf" in self.container.properties.value: + self.log.info( + "Detected container redundancy factor: %s", self.container.properties.value) + self.ior_cmd.dfs_oclass.update(None, "ior.dfs_oclass") + self.ior_cmd.dfs_dir_oclass.update(None, "ior.dfs_dir_oclass") + # Run run_ior_with_pool without invoking the pool query method for + # displaying pool space information (display_space=False) + self.run_ior_with_pool(create_pool=False, create_cont=False, + fail_on_warning=fail_on_warning, + display_space=False, + out_queue=self.out_queue) + if fail_on_warning and not self.out_queue.empty(): + self.assert_on_exception() + + def run_mdtest_thread(self, oclass="RP_2G1"): + """Start mdtest thread and wait until thread completes. + + Args: + oclass (str): IOR object class, container class. + """ + # Create container only + self.mdtest_cmd.dfs_destroy.update(False) + create_container = 0 + if self.container is None: + self.add_container(self.pool, create=False) + create_container = 1 + self.mdtest_cmd.dfs_oclass.update(oclass) + self.set_cont_class_properties(oclass) + if self.test_with_checksum is False: + tmp = self.get_object_replica_value(oclass) + rf_value = "rd_fac:{}".format(tmp - 1) + self.update_cont_properties(rf_value) + if create_container == 1: + self.container.create() + job_manager = self.get_mdtest_job_manager_command(self.manager) + job_manager.job.dfs_cont.update(self.container.identifier) + # Add a thread for these IOR arguments + process = threading.Thread(target=self.execute_mdtest) + # Launch the MDtest thread + process.start() + # Wait for the thread to finish + process.join() + if not self.out_queue.empty(): + self.assert_on_exception() diff --git a/utils/cq/check_update_copyright.sh b/utils/cq/check_update_copyright.sh index 7b89b47b41b..473045e3c45 100755 --- a/utils/cq/check_update_copyright.sh +++ b/utils/cq/check_update_copyright.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Copyright 2024 Intel Corporation. # Copyright 2025 Hewlett Packard Enterprise Development LP diff --git a/utils/githooks/find_base.sh b/utils/githooks/find_base.sh index 292a02f66e7..b812d28a783 100644 --- a/utils/githooks/find_base.sh +++ b/utils/githooks/find_base.sh @@ -1,6 +1,7 @@ -#!/bin/bash +#!/usr/bin/env bash # /* # * (C) Copyright 2024 Intel Corporation. +# * (C) Copyright 2025 Hewlett Packard Enterprise Development LP # * # * SPDX-License-Identifier: BSD-2-Clause-Patent # */ diff --git a/utils/githooks/git-version.sh b/utils/githooks/git-version.sh index 29e141f40c8..c99aa5b534d 100644 --- a/utils/githooks/git-version.sh +++ b/utils/githooks/git-version.sh @@ -1,4 +1,9 @@ -#!/bin/bash +#!/usr/bin/env bash +# +# Copyright 2025 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# VERSION=$(git --version | sed -ne 's/^[^0-9]*\([[0-9\.]*\).*/\1/p') if [ -z "$VERSION" ]; then diff --git a/utils/githooks/hook_base.sh b/utils/githooks/hook_base.sh index 7e3a61cc544..b6fa66d73d7 100755 --- a/utils/githooks/hook_base.sh +++ b/utils/githooks/hook_base.sh @@ -1,6 +1,7 @@ -#!/bin/bash +#!/usr/bin/env bash # # Copyright 2024 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # @@ -41,8 +42,8 @@ run-parts() { # don't run vim .swp files [ "${i%.sw?}" != "${i}" ] && continue skip_item=false - for skip in "${skip_list[@]}"; do - if [[ "${i}" =~ ${skip} ]]; then + for skip in "${skip_list[@]:-}"; do + if [[ -n "${skip}" ]] && [[ "${i}" =~ ${skip} ]]; then skip_item=true echo "Skipping ${i}" break diff --git a/utils/githooks/pre-commit b/utils/githooks/pre-commit index cab804423fc..78af2ba4c28 100755 --- a/utils/githooks/pre-commit +++ b/utils/githooks/pre-commit @@ -1,4 +1,7 @@ -#!/bin/bash +#!/usr/bin/env bash +# +# Copyright 2025 Hewlett Packard Enterprise Development LP +# set -eu . utils/githooks/hook_base.sh diff --git a/utils/githooks/pre-commit.d/10-update-copyright.sh b/utils/githooks/pre-commit.d/10-update-copyright.sh index b88cce8e634..e962e000168 100755 --- a/utils/githooks/pre-commit.d/10-update-copyright.sh +++ b/utils/githooks/pre-commit.d/10-update-copyright.sh @@ -1,6 +1,7 @@ -#!/bin/bash +#!/usr/bin/env bash # # Copyright 2022-2024 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # diff --git a/utils/githooks/pre-commit.d/20-codespell.sh b/utils/githooks/pre-commit.d/20-codespell.sh index eece684244e..dcc7bdb6380 100755 --- a/utils/githooks/pre-commit.d/20-codespell.sh +++ b/utils/githooks/pre-commit.d/20-codespell.sh @@ -1,6 +1,7 @@ -#!/bin/bash +#!/usr/bin/env bash # # Copyright 2024 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # diff --git a/utils/githooks/pre-commit.d/30-Jenkinsfile.sh b/utils/githooks/pre-commit.d/30-Jenkinsfile.sh index c69f08ffac5..f1f44d80989 100755 --- a/utils/githooks/pre-commit.d/30-Jenkinsfile.sh +++ b/utils/githooks/pre-commit.d/30-Jenkinsfile.sh @@ -1,6 +1,7 @@ -#!/bin/bash +#!/usr/bin/env bash # # Copyright 2023-2024 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # diff --git a/utils/githooks/pre-commit.d/40-yamllint.sh b/utils/githooks/pre-commit.d/40-yamllint.sh index 60b62fe86a8..c080edc7447 100755 --- a/utils/githooks/pre-commit.d/40-yamllint.sh +++ b/utils/githooks/pre-commit.d/40-yamllint.sh @@ -1,6 +1,7 @@ -#!/bin/bash +#!/usr/bin/env bash # # Copyright 2022-2024 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # diff --git a/utils/githooks/pre-commit.d/50-clang-format.sh b/utils/githooks/pre-commit.d/50-clang-format.sh index 82b725d2624..a20b215ba6c 100755 --- a/utils/githooks/pre-commit.d/50-clang-format.sh +++ b/utils/githooks/pre-commit.d/50-clang-format.sh @@ -1,6 +1,7 @@ -#!/bin/bash +#!/usr/bin/env bash # # Copyright 2022-2024 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # @@ -21,10 +22,10 @@ if ! command -v clang-format > /dev/null 2>&1; then exit 0 fi -echo "Formatting C files" - # Check version of clang-format, and print a helpful message if it's too old. If the right version # is not found then exit. -./site_scons/site_tools/extra/extra.py || exit 0 +./site_scons/site_tools/extra/extra.py > /dev/null || exit 0 + +echo "Formatting C files" git-clang-format --staged src diff --git a/utils/githooks/pre-commit.d/60-gofmt.sh b/utils/githooks/pre-commit.d/60-gofmt.sh index 0a702948786..c8cad14f530 100755 --- a/utils/githooks/pre-commit.d/60-gofmt.sh +++ b/utils/githooks/pre-commit.d/60-gofmt.sh @@ -1,6 +1,7 @@ -#!/bin/bash +#!/usr/bin/env bash # # Copyright 2022-2024 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # diff --git a/utils/githooks/pre-commit.d/70-isort.sh b/utils/githooks/pre-commit.d/70-isort.sh index 9b3d9fc445a..986e4642fa9 100755 --- a/utils/githooks/pre-commit.d/70-isort.sh +++ b/utils/githooks/pre-commit.d/70-isort.sh @@ -1,6 +1,7 @@ -#!/bin/bash +#!/usr/bin/env bash # # Copyright 2023-2024 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # @@ -20,17 +21,7 @@ fi if ! command -v isort > /dev/null 2>&1; then echo "isort not installed. Install isort command to improve pre-commit checks:" echo " python3 -m pip install -r ./utils/cq/requirements.txt" - . /etc/os-release - if [ "$ID" = "fedora" ]; then - echo " or" - echo " dnf install python3-isort" - fi - exit 0 fi echo "Checking if python imports are sorted" -if ! echo "$py_files" | xargs -r isort --check-only --jobs 8; then - echo " isort check failed, run 'isort --jobs 8 .' to fix." - exit 1 -fi diff --git a/utils/githooks/pre-commit.d/71-flake.sh b/utils/githooks/pre-commit.d/71-flake.sh index 082e0f863d8..cf00e8e6388 100755 --- a/utils/githooks/pre-commit.d/71-flake.sh +++ b/utils/githooks/pre-commit.d/71-flake.sh @@ -1,6 +1,7 @@ -#!/bin/bash +#!/usr/bin/env bash # # Copyright 2022-2024 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # diff --git a/utils/githooks/pre-commit.d/72-pylint.sh b/utils/githooks/pre-commit.d/72-pylint.sh index 4bff2456a66..047de5ba974 100755 --- a/utils/githooks/pre-commit.d/72-pylint.sh +++ b/utils/githooks/pre-commit.d/72-pylint.sh @@ -1,6 +1,7 @@ -#!/bin/bash +#!/usr/bin/env bash # # Copyright 2022-2024 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # diff --git a/utils/githooks/pre-commit.d/73-ftest.sh b/utils/githooks/pre-commit.d/73-ftest.sh index 9eaf4e8253b..0fc7248f08e 100755 --- a/utils/githooks/pre-commit.d/73-ftest.sh +++ b/utils/githooks/pre-commit.d/73-ftest.sh @@ -1,6 +1,7 @@ -#!/bin/bash +#!/usr/bin/env bash # # Copyright 2024 Intel Corporation. +# Copyright 2025 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # diff --git a/utils/githooks/prepare-commit-msg b/utils/githooks/prepare-commit-msg index 59aec7c8578..f050ebb9c34 100755 --- a/utils/githooks/prepare-commit-msg +++ b/utils/githooks/prepare-commit-msg @@ -1,3 +1,6 @@ -#!/bin/bash +#!/usr/bin/env bash +# +# Copyright 2025 Hewlett Packard Enterprise Development LP +# . utils/githooks/hook_base.sh diff --git a/utils/rpms/packaging/get_release_branch b/utils/rpms/packaging/get_release_branch index e017cca2071..6ce032d1d27 100755 --- a/utils/rpms/packaging/get_release_branch +++ b/utils/rpms/packaging/get_release_branch @@ -1,14 +1,20 @@ -#!/bin/bash - +#!/usr/bin/env bash +# +# Copyright 2025 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# # find the base branch of the current branch # base branches can be master, release/2.4+, release/3+ # or optionally branches passed into $1 set -eu -o pipefail IFS=' ' read -r -a add_bases <<< "${1:-}" origin="${ORIGIN:-origin}" -mapfile -t all_bases < <(echo "master" - git branch -r | sed -ne "/^ $origin\\/release\\/\(2.[4-9]\|[3-9]\)/s/^ $origin\\///p") -all_bases+=("${add_bases[@]}") +all_bases=() +while IFS= read -r base; do + all_bases+=("$base") +done < <(echo "master" + git branch -r | sed -ne "/^ $origin\\/release\\/\(2.[4-9]\|[3-9]\)/s/^ $origin\\///p") TARGET="master" min_diff=-1 for base in "${all_bases[@]}"; do