Merge pull request #315 from perezjosibm/wip.autom.utest.gen

Automatic unit test generator
ceph · Sep 23, 2024 · 9a3b030 · 9a3b030
2 parents 8f0295f + 03f38e8
commit 9a3b030
Show file tree

Hide file tree

Showing 26 changed files with 4,232 additions and 93 deletions.
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
@@ -27,7 +27,7 @@ def __init__(self, archive_dir, cluster, config):
         self.run_dir = os.path.join(settings.cluster.get('tmp_dir'),
                                     '{:0>8}'.format(config.get('iteration')),
                                     self.getclass())
-        self.osd_ra = config.get('osd_ra', None)
+        self.osd_ra = config.get('osd_ra', '0')
         self.cmd_path = ''
         self.valgrind = config.get('valgrind', None)
         self.cmd_path_full = ''

diff --git a/benchmark/getput.py b/benchmark/getput.py
@@ -36,8 +36,6 @@ def __init__(self, archive_dir, cluster, config):
         self.subuser = '%s:swift' % self.user
         self.key = config.get('key', 'vzCEkuryfn060dfee4fgQPqFrncKEIkh3ZcdOANY')  # dummy key from ceph radosgw docs
         self.auth_urls = config.get('auth', self.cluster.get_auth_urls())
-        self.cleanup()
-        self.cleandir()
 
     def exists(self):
         if os.path.exists(self.out_dir):
@@ -49,6 +47,9 @@ def exists(self):
     def initialize(self):
         super(Getput, self).initialize()
 
+        # Set and clear dir
+        self.cleanup()
+        self.cleandir()
         # create the user and key
         self.cluster.add_swift_user(self.user, self.subuser, self.key)
 

diff --git a/cluster/ceph.py b/cluster/ceph.py
@@ -91,8 +91,8 @@ def postprocess(self):
 
 
 class Ceph(Cluster):
-    def __init__(self, config):
-        super(Ceph, self).__init__(config)
+    def _set_default_attributes(self, config):
+        """ Factorised method to reuse for mock init and unit tests"""
         self.health_wait = config.get('health_wait', 5)
         self.ceph_osd_cmd = config.get('ceph-osd_cmd', '/usr/bin/ceph-osd')
         self.ceph_mon_cmd = config.get('ceph-mon_cmd', '/usr/bin/ceph-mon')
@@ -141,9 +141,6 @@ def __init__(self, config):
         self.cur_ruleset = 1
         self.idle_duration = config.get('idle_duration', 0)
         self.use_existing = config.get('use_existing', True)
-        self.stoprequest = threading.Event()
-        self.haltrequest = threading.Event()
-        self.startiorequest = threading.Event()
 
         self.urls = []
         self.auth_urls = []
@@ -155,6 +152,19 @@ def __init__(self, config):
         self.prefill_recov_time = 0
         self.recov_pool_name = ''
 
+    def __init__(self, config, _init_threads=True):
+        super(Ceph, self).__init__(config)
+        self._set_default_attributes(config)
+        if _init_threads:
+            self.stoprequest = threading.Event()
+            self.haltrequest = threading.Event()
+            self.startiorequest = threading.Event()
+
+    @classmethod
+    def mockinit(cls, config):
+        """Only used by serialise_benchmark.py -- do not call in production code"""
+        return cls(config, _init_threads=False )
+
     def initialize(self):
         # Reset the rulesets
         self.ruleset_map = {}
@@ -932,6 +942,7 @@ def make_rgw_pools(self):
         self.mkpool('default.rgw.buckets.index', rgw_pools.get('buckets_index', 'default'), 'rgw')
         self.mkpool('default.rgw.buckets.data', rgw_pools.get('buckets_data', 'default'), 'rgw')
 
+
 class RecoveryTestThreadBlocking(threading.Thread):
     def __init__(self, config, cluster, callback, stoprequest, haltrequest):
         threading.Thread.__init__(self)

diff --git a/cluster/cluster.py b/cluster/cluster.py
@@ -5,6 +5,7 @@ def __init__(self, config):
         self.mnt_dir = config.get('mnt_dir', "%s/%s" % (base_tmp, 'mnt'))
         self.tmp_dir = "%s/%s" % (base_tmp, config.get('clusterid'))
         self.archive_dir = "%s/%s" % (config.get('archive_dir'), config.get('clusterid'))
+        self.tmp_conf = config.get('tmp_conf', '/tmp/cbt')
 
     def get_mnt_dir(self):
         return self.mnt_dir

diff --git a/common.py b/common.py
@@ -1,3 +1,6 @@
+"""
+Common classes to wrap around pdsh (parallel shell)
+"""
 import errno
 import logging
 import os
@@ -9,17 +12,40 @@
 
 logger = logging.getLogger("cbt")
 
+class Localhost(object):
+    """
+    This class encapsulates a single dictionary with the information of the localhost
+    """ 
+    def __init__(self):
+        self.local_fqdn = get_fqdn_local()
+        self.local_hostname = socket.gethostname()
+        self.local_short_hostname = self.local_hostname.split('.')[0]
+        self.local_list = ('localhost', self.local_fqdn, self.local_hostname, self.local_short_hostname)
+
+    def is_localhost(self, node):
+        """ Returns true if the name refers to the local host """
+        if node in self.local_list:
+            return node
+        return None
+
+#global 
+SINGLETON_LOCALHOST = None
+def getLocalhost(node):
+    global SINGLETON_LOCALHOST 
+    if SINGLETON_LOCALHOST is None:
+        SINGLETON_LOCALHOST = Localhost()
+    return SINGLETON_LOCALHOST.is_localhost(node)
 
 def join_nostr(command):
     if isinstance(command, list):
         return ' '.join(command)
     return command
 
-# this class overrides the communicate() method to check the return code and
-# throw an exception if return code is not OK
-
-
 class CheckedPopen(object):
+    """
+    This class overrides the communicate() method to check the return code and
+    throw an exception if return code is not OK
+    """ 
     UNINIT = -720
     OK = 0
 
@@ -106,7 +132,7 @@ def expanded_node_list(nodes):
     # logger.info("full list of hosts: %s" % str(full_node_list))
     return node_list
 
-
+# Define an auxiliar method to sanitize the list of nodes, once
 def get_localnode(nodes):
     # Similarly to `expanded_node_list(nodes)` we assume the passed nodes
     # param is always string. This is justified as the callers use `nodes`
@@ -116,18 +142,7 @@ def get_localnode(nodes):
     nodes_list = expanded_node_list(nodes)
     if len(nodes_list) < 1:
         return None
-
-    local_fqdn = get_fqdn_local()
-    local_hostname = socket.gethostname()
-    local_short_hostname = local_hostname.split('.')[0]
-
-    remote_host = settings.host_info(nodes_list[0])['host']
-    #logger.debug('remote_host=%s, local_fqdn=%s local_hostname=%s local_short_hostname=%s'
-    #                 % (remote_host, str(local_fqdn), str(local_hostname), str(local_short_hostname) ))
-    if remote_host in ('localhost', local_fqdn, local_hostname, local_short_hostname):
-        return remote_host
-    return None
-
+    return getLocalhost(nodes_list[0])
 
 def sh(local_node, command, continue_if_error=True):
     return CheckedPopenLocal(local_node, join_nostr(command),
@@ -144,7 +159,7 @@ def pdsh(nodes, command, continue_if_error=True):
         env = {}
         if pdsh_ssh_args:
             env = {'PDSH_SSH_ARGS':pdsh_ssh_args}
-        # -f: fan out n nodes, -R rcmd name, -w target node list
+        # -f: fan out n nodes, -R rcmd name (ssh by default), -w target node list
         args = [pdsh_cmd, '-f', str(len(expanded_node_list(nodes))), '-R', 'ssh', '-w', nodes, join_nostr(command)]
         # -S means pdsh fails if any host fails
         if not continue_if_error:

diff --git a/docs/AutomaticUnitTestGeneration.md b/docs/AutomaticUnitTestGeneration.md
@@ -0,0 +1,81 @@
+# tools/serialise_benchmark.py -- Automatic Unit Test Generation
+
+## Description:
+
+This is a standalone tool to generate unit tests for CBT. 
+
+The execution of the script produces as output:
+
+1. a new baseline tools/baseline.json, this is a serialisation of each of the Benchmark class instances,
+2. a new set of test/test_bm{benchmark}.py, each consisting on a set of sanity unit tests.
+
+## Requirements:
+
+The Python modules pytest and pytest-xdist should be installed on the machine that will run the tool, this can be the same as the one that drives CBT.
+
+## Usage:
+
+The following is an example of the execution of the script:
+
+```bash
+# python3 tools/serialise_benchmark.py
+```
+An example of the expected normal ouput is shown below.
+
+![cbt_utests_gen](cbt_utest_gen.png)
+
+This would have created (or updated if existing already) the set of unit tests for the supported benchmarks.
+
+## Execution of unit tests:
+
+The unit tests can be executed from the command line as follows:
+
+```bash
+# python3 -m pytest -p no:cacheprovider tests/
+```
+An example output showing a successful execution:
+
+![cbt_utests_gen](cbt_utest_gen.png)
+
+Note: the tests skipped above require an environment variable to be defined to identify the target nodes 
+for exercising pdsh.
+
+The following is an example to execute the pdsh tests:
+
+```bash
+# export CBT_TEST_NODES=root@ceph2,root@ceph4
+# export PDSH_SSH_ARGS_APPEND="-p 8023 -o StrictHostKeyChecking=no -v -E /tmp/ssh.out"
+```
+
+## Generation of Unit tests
+
+The main idea is the concept of **referencial transparency**, (see for example [ref_transparency](https://stackoverflow.com/questions/210835/what-is-referential-transparency)). Basically, in the functional programming
+paradigm, it means that given a function and an input value, you will always receive the same output. The test
+generator takes advantage of this since the constructors of the Benchmark classes should always produce instances
+with the same initial state. The class Benchmark in CBT expects as an argument an object from a .yaml file (the test plan, which includes a Cluster type object). If we ensure to provide a fixed minimal cluster object to the
+constructor of the Benchmark class, we can have an _invariant_ that we can use to test that each of the attributes
+of the Benchmark classes have the same value across runs.
+
+In other words, each class constructor of the CBT Benchmark class behaves like a function and always produces
+object instances initialised with the same values, provided the same fixed cluster instance as argument.
+
+
+* For each Benchmark class supported, the tool constructs a serialisation of the object instance, and saves them
+in the tools/baseline.json.
+* To prevent tampering, an md5sum of the contents of the .json file is calculated.
+* For each Benchmark class suppported, the tool uses a boilerplate code template to produce unit tests. Each unit test verifies that a supported attribute of the benchmark class is initialised as recorded by the baseline.json.
+* When executed, the unit tests perform a sanity check to ensure that the baseline.json has not changed since the creation of the unit tests, if so proceeds to verify each attribute of each Benchmark class. This is useful to detect
+whether some attributes has been changed, replaced or deleted. This is especially useful to detect for regressions
+during code refactoring.
+
+
+## Workflow recommeded
+
+
+* Before starting a code refactoring effort, run the unit tests: they should all pass as shown above.
+* Make the intended code change -- for example, remove a benchmark.py class module, or refine with new attributes,
+or delete some existing attributes.
+* Run the unit tests: some should fail accordingly to indicate the missing attributes that existed in the past but no longer in the current benchmark class module. 
+* Run the tool serialise_benchmark.py. This will regenerate the baseline.json and the unit tests.
+* Run the unit tests: they should now all pass.
+* Iterate if required.
diff --git a/docs/cbt_utests_gen.png b/docs/cbt_utests_gen.png
diff --git a/docs/cbt_utests_run.png b/docs/cbt_utests_run.png
diff --git a/settings.py b/settings.py
@@ -142,3 +142,23 @@ def uniquenodes(nodes):
 
 def shutdown(message):
     sys.exit(message)
+
+
+def mock_initialize(config_file="tools/invariant.yaml"):
+    """ Auxiliary method only to be used from serialise_benchmarks.py"""
+    global common, cluster, client_endpoints, benchmarks, monitoring_profiles
+    config = {}
+    try:
+        with open(config_file) as f:
+            config = yaml.safe_load(f)
+    except IOError as e:
+        raise argparse.ArgumentTypeError(str(e))
+
+    common = config.get('common', {})
+    cluster = config.get('cluster', {})
+    client_endpoints = config.get('client_endpoints', {})
+    benchmarks = config.get('benchmarks', {})
+    monitoring_profiles = config.get('monitoring_profiles', dict(collectl={}))
+    # Set some defaults required
+    cluster['tmp_dir'] = '/tmp/cbt.XYZ'
+    cluster['osd_ra'] = '0'
diff --git a/tests/test_benchmarkfactory.py b/tests/test_benchmarkfactory.py
@@ -1,16 +1,15 @@
-import unittest
+""" Unit tests for the Benchmarkfactory class """
 
+import unittest
 import benchmarkfactory
+from log_support import setup_loggers
 
 
 class TestBenchmarkFactory(unittest.TestCase):
+    """ Sanity tests for Benchmarkfactory """
     def test_permutations_1(self):
-        config = dict(
-            x=12,
-            y=True,
-            z={1: 2},
-            t=[1, 2, "4"]
-        )
+        """ Basic sanity permutations """
+        config = {"x": 12, "y": True, "z": {1: 2}, "t": [1, 2, 4]}
         cfgs = list(benchmarkfactory.all_configs(config))
         self.assertEqual(len(cfgs), 3)
         self.assertEqual([dict] * 3, list(map(type, cfgs)))
@@ -24,14 +23,8 @@ def test_permutations_1(self):
         self.assertEqual(sorted(tvals), sorted(config['t']))
 
     def test_permutations_2(self):
-        config = dict(
-            x=12,
-            y=True,
-            z={1: 2},
-            t=[1, 2, "4"],
-            j=[7, True, "gg"]
-        )
-
+        """ Basic sanity permutations """
+        config = {"x": 12, "y": True, "z": {1: 2}, "t": [1, 2, 4], "j": [7, True, 'gg']}
         cfgs = list(benchmarkfactory.all_configs(config))
         self.assertEqual(len(cfgs), 9)
         self.assertEqual([dict] * 9, list(map(type, cfgs)))
@@ -48,14 +41,12 @@ def test_permutations_2(self):
                 self.assertEqual(1, tjvals.count((tval, jval)))
 
     def test_permutations_0(self):
-        config = dict(
-            x=12,
-            y=True,
-            z={1: 2},
-        )
+        """ Basic sanity permutations """
+        config = {"x": 12, "y": True, "z": {1: 2}}
         cfgs = list(benchmarkfactory.all_configs(config))
         self.assertEqual(len(cfgs), 1)
         self.assertEqual(cfgs[0], config)
 
 if __name__ == '__main__':
+    setup_loggers(log_fname='/tmp/cbt-utest.log')
     unittest.main()