From 48c22638377e2a0215188bb5fbdd48c21137f83b Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Mon, 8 Jul 2024 12:09:33 +0000 Subject: [PATCH] teuthology/schedule: Add "descr" option A convenience option. When a large batch has many failed tests, "descr" makes it easy to schedule just selected ones. It is as simple as copy-paste description of failed test. teuthology-suite -e akupczyk@redhat.com -vv -c aclamk-testing-ceres-2024-07-01-1011 -p 50 \ --descr "rados/singleton-bluestore/{all/cephtool mon_election/connectivity msgr-failures/none \ msgr/async-v2only objectstore/bluestore-comp-lz4 rados supported-random-distro$/{centos_latest}}" Many tests may be rerun in this fashion; "descr" accepts multiple comma separated test descriptions. Signed-off-by: Adam Kupczyk --- scripts/suite.py | 2 + teuthology/suite/run.py | 110 +++++++++++++++++++++++------ teuthology/suite/test/test_run_.py | 49 +++++++++++++ 3 files changed, 140 insertions(+), 21 deletions(-) diff --git a/scripts/suite.py b/scripts/suite.py index 77561b7e0..324065365 100644 --- a/scripts/suite.py +++ b/scripts/suite.py @@ -9,6 +9,7 @@ usage: teuthology-suite --help teuthology-suite [-v | -vv ] --suite [options] [...] teuthology-suite [-v | -vv ] --rerun [options] [...] + teuthology-suite [-v | -vv ] --descr [options] [...] Run a suite of ceph integration tests. A suite is a directory containing facets. A facet is a directory containing config snippets. Running a suite @@ -126,6 +127,7 @@ 2/ ... -1/ will schedule all jobs in the suite (many more than once). If specified, this value can be found in results.log. + --descr Schedule a suite based on comma separated list of descriptions. -p , --priority Job priority (lower is sooner) [default: 1000] diff --git a/teuthology/suite/run.py b/teuthology/suite/run.py index a37887811..bcffb8493 100644 --- a/teuthology/suite/run.py +++ b/teuthology/suite/run.py @@ -27,6 +27,45 @@ log = logging.getLogger(__name__) +def descr_to_yamls(descriptions, suites_path): + """ + Function converts description of a job into sequence of .yaml files. + Input "descriptions" is a string containing comma-separated list of job descriptions. + Input "suites_path" is a posix path to dir containing all suites. + """ + def expand_descr(test_yamls, source, prefix, base_pos): + """ + Expand description using production rules (explanation, not Chomsky context-free formalism): + rule 1: (simplification) + "A{BX}" => AB A{X} + rule 2: (termination) + "A" => "suites_pathA.yaml" + """ + pos = base_pos + while (pos < len(source)): + if source[pos] == '{': + more_prefix=source[base_pos:pos] + pos = expand_descr(test_yamls, source, prefix + more_prefix, pos + 1) + base_pos = pos + elif source[pos] == '}': + if base_pos != pos: + test_yamls.append(suites_path + "/" + prefix + source[base_pos:pos] + ".yaml") + return pos + 1 + elif source[pos] == ' ': + if base_pos != pos: + test_yamls.append(suites_path + "/" + prefix + source[base_pos:pos] + ".yaml") + pos = pos + 1 + base_pos = pos + else: + pos = pos + 1 + result = [] + desc_tab = descriptions.split(',') + for d in desc_tab: + dd = d.strip() + test_yamls = [] + expand_descr(test_yamls, dd, "", 0) + result.append((dd, test_yamls)) + return result class Run(object): WAIT_MAX_JOB_TIME = 30 * 60 @@ -70,7 +109,7 @@ def make_run_name(self): [ self.user, str(self.timestamp), - self.args.suite, + self.args.suite or "rerun", self.args.ceph_branch, self.args.kernel_branch or '-', self.args.flavor, worker @@ -357,8 +396,14 @@ def build_base_config(self): job_config.timestamp = self.timestamp job_config.priority = self.args.priority job_config.seed = self.args.seed + if self.args.subset and self.args.descr: + util.schedule_fail("--subset is not compatible with --descr") + if self.args.suite and self.args.descr: + util.schedule_fail("--suite is not compatible with --descr") if self.args.subset: job_config.subset = '/'.join(str(i) for i in self.args.subset) + if self.args.descr: + job_config.suite = "rerun" if self.args.email: job_config.email = self.args.email if self.args.owner: @@ -564,6 +609,44 @@ def check_num_jobs(self, jobs_to_schedule): if threshold and jobs_to_schedule > threshold: util.schedule_fail(msg, dry_run=self.args.dry_run) + def prepare_configs(self): + suite_name = self.base_config.suite or "rerun" + suites_path = os.path.normpath(os.path.join( + self.suite_repo_path, + self.args.suite_relpath, + 'suites' + )) + suite_path = os.path.normpath(os.path.join( + suites_path, + suite_name.replace(':', '/'), + )) + log.debug('Suites in %s' % (suites_path)) + if self.args.descr: + log.debug(f'Rerun by description in {suites_path} in %s') + configs = descr_to_yamls(self.args.descr, suites_path) + use_suite_name = None + generated = len(configs) + log.info(f'Rerun from description in {suite_path} generated {generated} jobs') + else: + log.debug('Suite %s in %s' % (suite_name, suites_path)) + log.debug(f"subset = {self.args.subset}") + log.debug(f"no_nested_subset = {self.args.no_nested_subset}") + configs = build_matrix(suite_path, + subset=self.args.subset, + no_nested_subset=self.args.no_nested_subset, + seed=self.args.seed) + use_suite_name = self.base_config.suite + generated = len(configs) + log.info(f'Suite {suite_name} in {suites_path} generated {generated} jobs (not yet filtered or merged)') + configs = list(config_merge(configs, + filter_in=self.args.filter_in, + filter_out=self.args.filter_out, + filter_all=self.args.filter_all, + filter_fragments=self.args.filter_fragments, + seed=self.args.seed, + suite_name=use_suite_name)) + return configs + def schedule_suite(self): """ Schedule the suite-run. Returns the number of jobs scheduled. @@ -574,29 +657,14 @@ def schedule_suite(self): log.debug("Using '%s' as an arch" % arch) else: arch = util.get_arch(self.base_config.machine_type) - suite_name = self.base_config.suite - suite_path = os.path.normpath(os.path.join( + suite_name = self.base_config.suite or "rerun" + suites_path = os.path.normpath(os.path.join( self.suite_repo_path, self.args.suite_relpath, - 'suites', - self.base_config.suite.replace(':', '/'), + 'suites' )) - log.debug('Suite %s in %s' % (suite_name, suite_path)) - log.debug(f"subset = {self.args.subset}") - log.debug(f"no_nested_subset = {self.args.no_nested_subset}") - configs = build_matrix(suite_path, - subset=self.args.subset, - no_nested_subset=self.args.no_nested_subset, - seed=self.args.seed) + configs = self.prepare_configs() generated = len(configs) - log.info(f'Suite {suite_name} in {suite_path} generated {generated} jobs (not yet filtered or merged)') - configs = list(config_merge(configs, - filter_in=self.args.filter_in, - filter_out=self.args.filter_out, - filter_all=self.args.filter_all, - filter_fragments=self.args.filter_fragments, - seed=self.args.seed, - suite_name=suite_name)) if self.args.dry_run: log.debug("Base job config:\n%s" % self.base_config) @@ -696,7 +764,7 @@ def schedule_suite(self): total_count *= self.args.num log.info( 'Suite %s in %s scheduled %d jobs.' % - (suite_name, suite_path, count) + (suite_name, suites_path, count) ) log.info('%d/%d jobs were filtered out.', (generated - count), diff --git a/teuthology/suite/test/test_run_.py b/teuthology/suite/test/test_run_.py index 1dc23e20d..c4bd5fc01 100644 --- a/teuthology/suite/test/test_run_.py +++ b/teuthology/suite/test/test_run_.py @@ -360,3 +360,52 @@ def test_newest_success( m_find_git_parents.assert_has_calls( [call('ceph', 'ceph_sha1', 10)] ) + + def test_dupa( + self + ): + X = run.descr_to_yamls( + "rados/objectstore/{backends/objectstore-bluestore-b supported-random-distro$/{ubuntu_latest}}", + "/cephfs/github.com_ceph_build/qa/suites" + ) + assert(X == + [ + ('rados/objectstore/{backends/objectstore-bluestore-b supported-random-distro$/{ubuntu_latest}}', + ['/cephfs/github.com_ceph_build/qa/suites/rados/objectstore/backends/objectstore-bluestore-b.yaml', + '/cephfs/github.com_ceph_build/qa/suites/rados/objectstore/supported-random-distro$/ubuntu_latest.yaml']) + ] + ) + + X = run.descr_to_yamls( + "rados/singleton-nomsgr/{all/health-warnings mon_election/connectivity rados supported-random-distro$/{ubuntu_latest}}", + "/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_aclamk-testing/qa/suites" + ) + print(str(X)) + assert(X == + [ + ("rados/singleton-nomsgr/{all/health-warnings mon_election/connectivity rados supported-random-distro$/{ubuntu_latest}}", + ['/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_aclamk-testing/qa/suites/rados/singleton-nomsgr/all/health-warnings.yaml', + '/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_aclamk-testing/qa/suites/rados/singleton-nomsgr/mon_election/connectivity.yaml', + '/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_aclamk-testing/qa/suites/rados/singleton-nomsgr/rados.yaml', + '/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_aclamk-testing/qa/suites/rados/singleton-nomsgr/supported-random-distro$/ubuntu_latest.yaml']) + ] + ) + + X = run.descr_to_yamls( + "rados/cephadm/osds/{0-distro/centos_9.stream_runc 0-nvme-loop 1-start 2-ops/rm-zap-flag}," + "rados/standalone/{supported-random-distro$/{ubuntu_latest} workloads/scrub}", + "/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_squid/qa/suites" + ) + assert(X == + [ + ("rados/cephadm/osds/{0-distro/centos_9.stream_runc 0-nvme-loop 1-start 2-ops/rm-zap-flag}", + ['/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_squid/qa/suites/rados/cephadm/osds/0-distro/centos_9.stream_runc.yaml', + '/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_squid/qa/suites/rados/cephadm/osds/0-nvme-loop.yaml', + '/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_squid/qa/suites/rados/cephadm/osds/1-start.yaml', + '/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_squid/qa/suites/rados/cephadm/osds/2-ops/rm-zap-flag.yaml']) + , + ("rados/standalone/{supported-random-distro$/{ubuntu_latest} workloads/scrub}", + ['/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_squid/qa/suites/rados/standalone/supported-random-distro$/ubuntu_latest.yaml', + '/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_squid/qa/suites/rados/standalone/workloads/scrub.yaml']) + ] + )