Skip to content

Commit

Permalink
Merge pull request #315 from perezjosibm/wip.autom.utest.gen
Browse files Browse the repository at this point in the history
Automatic unit test generator
  • Loading branch information
perezjosibm authored Sep 23, 2024
2 parents 8f0295f + 03f38e8 commit 9a3b030
Show file tree
Hide file tree
Showing 26 changed files with 4,232 additions and 93 deletions.
2 changes: 1 addition & 1 deletion benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def __init__(self, archive_dir, cluster, config):
self.run_dir = os.path.join(settings.cluster.get('tmp_dir'),
'{:0>8}'.format(config.get('iteration')),
self.getclass())
self.osd_ra = config.get('osd_ra', None)
self.osd_ra = config.get('osd_ra', '0')
self.cmd_path = ''
self.valgrind = config.get('valgrind', None)
self.cmd_path_full = ''
Expand Down
5 changes: 3 additions & 2 deletions benchmark/getput.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ def __init__(self, archive_dir, cluster, config):
self.subuser = '%s:swift' % self.user
self.key = config.get('key', 'vzCEkuryfn060dfee4fgQPqFrncKEIkh3ZcdOANY') # dummy key from ceph radosgw docs
self.auth_urls = config.get('auth', self.cluster.get_auth_urls())
self.cleanup()
self.cleandir()

def exists(self):
if os.path.exists(self.out_dir):
Expand All @@ -49,6 +47,9 @@ def exists(self):
def initialize(self):
super(Getput, self).initialize()

# Set and clear dir
self.cleanup()
self.cleandir()
# create the user and key
self.cluster.add_swift_user(self.user, self.subuser, self.key)

Expand Down
21 changes: 16 additions & 5 deletions cluster/ceph.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ def postprocess(self):


class Ceph(Cluster):
def __init__(self, config):
super(Ceph, self).__init__(config)
def _set_default_attributes(self, config):
""" Factorised method to reuse for mock init and unit tests"""
self.health_wait = config.get('health_wait', 5)
self.ceph_osd_cmd = config.get('ceph-osd_cmd', '/usr/bin/ceph-osd')
self.ceph_mon_cmd = config.get('ceph-mon_cmd', '/usr/bin/ceph-mon')
Expand Down Expand Up @@ -141,9 +141,6 @@ def __init__(self, config):
self.cur_ruleset = 1
self.idle_duration = config.get('idle_duration', 0)
self.use_existing = config.get('use_existing', True)
self.stoprequest = threading.Event()
self.haltrequest = threading.Event()
self.startiorequest = threading.Event()

self.urls = []
self.auth_urls = []
Expand All @@ -155,6 +152,19 @@ def __init__(self, config):
self.prefill_recov_time = 0
self.recov_pool_name = ''

def __init__(self, config, _init_threads=True):
super(Ceph, self).__init__(config)
self._set_default_attributes(config)
if _init_threads:
self.stoprequest = threading.Event()
self.haltrequest = threading.Event()
self.startiorequest = threading.Event()

@classmethod
def mockinit(cls, config):
"""Only used by serialise_benchmark.py -- do not call in production code"""
return cls(config, _init_threads=False )

def initialize(self):
# Reset the rulesets
self.ruleset_map = {}
Expand Down Expand Up @@ -932,6 +942,7 @@ def make_rgw_pools(self):
self.mkpool('default.rgw.buckets.index', rgw_pools.get('buckets_index', 'default'), 'rgw')
self.mkpool('default.rgw.buckets.data', rgw_pools.get('buckets_data', 'default'), 'rgw')


class RecoveryTestThreadBlocking(threading.Thread):
def __init__(self, config, cluster, callback, stoprequest, haltrequest):
threading.Thread.__init__(self)
Expand Down
1 change: 1 addition & 0 deletions cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ def __init__(self, config):
self.mnt_dir = config.get('mnt_dir', "%s/%s" % (base_tmp, 'mnt'))
self.tmp_dir = "%s/%s" % (base_tmp, config.get('clusterid'))
self.archive_dir = "%s/%s" % (config.get('archive_dir'), config.get('clusterid'))
self.tmp_conf = config.get('tmp_conf', '/tmp/cbt')

def get_mnt_dir(self):
return self.mnt_dir
Expand Down
51 changes: 33 additions & 18 deletions common.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""
Common classes to wrap around pdsh (parallel shell)
"""
import errno
import logging
import os
Expand All @@ -9,17 +12,40 @@

logger = logging.getLogger("cbt")

class Localhost(object):
"""
This class encapsulates a single dictionary with the information of the localhost
"""
def __init__(self):
self.local_fqdn = get_fqdn_local()
self.local_hostname = socket.gethostname()
self.local_short_hostname = self.local_hostname.split('.')[0]
self.local_list = ('localhost', self.local_fqdn, self.local_hostname, self.local_short_hostname)

def is_localhost(self, node):
""" Returns true if the name refers to the local host """
if node in self.local_list:
return node
return None

#global
SINGLETON_LOCALHOST = None
def getLocalhost(node):
global SINGLETON_LOCALHOST
if SINGLETON_LOCALHOST is None:
SINGLETON_LOCALHOST = Localhost()
return SINGLETON_LOCALHOST.is_localhost(node)

def join_nostr(command):
if isinstance(command, list):
return ' '.join(command)
return command

# this class overrides the communicate() method to check the return code and
# throw an exception if return code is not OK


class CheckedPopen(object):
"""
This class overrides the communicate() method to check the return code and
throw an exception if return code is not OK
"""
UNINIT = -720
OK = 0

Expand Down Expand Up @@ -106,7 +132,7 @@ def expanded_node_list(nodes):
# logger.info("full list of hosts: %s" % str(full_node_list))
return node_list


# Define an auxiliar method to sanitize the list of nodes, once
def get_localnode(nodes):
# Similarly to `expanded_node_list(nodes)` we assume the passed nodes
# param is always string. This is justified as the callers use `nodes`
Expand All @@ -116,18 +142,7 @@ def get_localnode(nodes):
nodes_list = expanded_node_list(nodes)
if len(nodes_list) < 1:
return None

local_fqdn = get_fqdn_local()
local_hostname = socket.gethostname()
local_short_hostname = local_hostname.split('.')[0]

remote_host = settings.host_info(nodes_list[0])['host']
#logger.debug('remote_host=%s, local_fqdn=%s local_hostname=%s local_short_hostname=%s'
# % (remote_host, str(local_fqdn), str(local_hostname), str(local_short_hostname) ))
if remote_host in ('localhost', local_fqdn, local_hostname, local_short_hostname):
return remote_host
return None

return getLocalhost(nodes_list[0])

def sh(local_node, command, continue_if_error=True):
return CheckedPopenLocal(local_node, join_nostr(command),
Expand All @@ -144,7 +159,7 @@ def pdsh(nodes, command, continue_if_error=True):
env = {}
if pdsh_ssh_args:
env = {'PDSH_SSH_ARGS':pdsh_ssh_args}
# -f: fan out n nodes, -R rcmd name, -w target node list
# -f: fan out n nodes, -R rcmd name (ssh by default), -w target node list
args = [pdsh_cmd, '-f', str(len(expanded_node_list(nodes))), '-R', 'ssh', '-w', nodes, join_nostr(command)]
# -S means pdsh fails if any host fails
if not continue_if_error:
Expand Down
81 changes: 81 additions & 0 deletions docs/AutomaticUnitTestGeneration.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# tools/serialise_benchmark.py -- Automatic Unit Test Generation

## Description:

This is a standalone tool to generate unit tests for CBT.

The execution of the script produces as output:

1. a new baseline tools/baseline.json, this is a serialisation of each of the Benchmark class instances,
2. a new set of test/test_bm{benchmark}.py, each consisting on a set of sanity unit tests.

## Requirements:

The Python modules pytest and pytest-xdist should be installed on the machine that will run the tool, this can be the same as the one that drives CBT.

## Usage:

The following is an example of the execution of the script:

```bash
# python3 tools/serialise_benchmark.py
```
An example of the expected normal ouput is shown below.

![cbt_utests_gen](cbt_utest_gen.png)

This would have created (or updated if existing already) the set of unit tests for the supported benchmarks.

## Execution of unit tests:

The unit tests can be executed from the command line as follows:

```bash
# python3 -m pytest -p no:cacheprovider tests/
```
An example output showing a successful execution:

![cbt_utests_gen](cbt_utest_gen.png)

Note: the tests skipped above require an environment variable to be defined to identify the target nodes
for exercising pdsh.

The following is an example to execute the pdsh tests:

```bash
# export CBT_TEST_NODES=root@ceph2,root@ceph4
# export PDSH_SSH_ARGS_APPEND="-p 8023 -o StrictHostKeyChecking=no -v -E /tmp/ssh.out"
```

## Generation of Unit tests

The main idea is the concept of **referencial transparency**, (see for example [ref_transparency](https://stackoverflow.com/questions/210835/what-is-referential-transparency)). Basically, in the functional programming
paradigm, it means that given a function and an input value, you will always receive the same output. The test
generator takes advantage of this since the constructors of the Benchmark classes should always produce instances
with the same initial state. The class Benchmark in CBT expects as an argument an object from a .yaml file (the test plan, which includes a Cluster type object). If we ensure to provide a fixed minimal cluster object to the
constructor of the Benchmark class, we can have an _invariant_ that we can use to test that each of the attributes
of the Benchmark classes have the same value across runs.

In other words, each class constructor of the CBT Benchmark class behaves like a function and always produces
object instances initialised with the same values, provided the same fixed cluster instance as argument.


* For each Benchmark class supported, the tool constructs a serialisation of the object instance, and saves them
in the tools/baseline.json.
* To prevent tampering, an md5sum of the contents of the .json file is calculated.
* For each Benchmark class suppported, the tool uses a boilerplate code template to produce unit tests. Each unit test verifies that a supported attribute of the benchmark class is initialised as recorded by the baseline.json.
* When executed, the unit tests perform a sanity check to ensure that the baseline.json has not changed since the creation of the unit tests, if so proceeds to verify each attribute of each Benchmark class. This is useful to detect
whether some attributes has been changed, replaced or deleted. This is especially useful to detect for regressions
during code refactoring.


## Workflow recommeded


* Before starting a code refactoring effort, run the unit tests: they should all pass as shown above.
* Make the intended code change -- for example, remove a benchmark.py class module, or refine with new attributes,
or delete some existing attributes.
* Run the unit tests: some should fail accordingly to indicate the missing attributes that existed in the past but no longer in the current benchmark class module.
* Run the tool serialise_benchmark.py. This will regenerate the baseline.json and the unit tests.
* Run the unit tests: they should now all pass.
* Iterate if required.
Binary file added docs/cbt_utests_gen.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/cbt_utests_run.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
20 changes: 20 additions & 0 deletions settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,23 @@ def uniquenodes(nodes):

def shutdown(message):
sys.exit(message)


def mock_initialize(config_file="tools/invariant.yaml"):
""" Auxiliary method only to be used from serialise_benchmarks.py"""
global common, cluster, client_endpoints, benchmarks, monitoring_profiles
config = {}
try:
with open(config_file) as f:
config = yaml.safe_load(f)
except IOError as e:
raise argparse.ArgumentTypeError(str(e))

common = config.get('common', {})
cluster = config.get('cluster', {})
client_endpoints = config.get('client_endpoints', {})
benchmarks = config.get('benchmarks', {})
monitoring_profiles = config.get('monitoring_profiles', dict(collectl={}))
# Set some defaults required
cluster['tmp_dir'] = '/tmp/cbt.XYZ'
cluster['osd_ra'] = '0'
31 changes: 11 additions & 20 deletions tests/test_benchmarkfactory.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import unittest
""" Unit tests for the Benchmarkfactory class """

import unittest
import benchmarkfactory
from log_support import setup_loggers


class TestBenchmarkFactory(unittest.TestCase):
""" Sanity tests for Benchmarkfactory """
def test_permutations_1(self):
config = dict(
x=12,
y=True,
z={1: 2},
t=[1, 2, "4"]
)
""" Basic sanity permutations """
config = {"x": 12, "y": True, "z": {1: 2}, "t": [1, 2, 4]}
cfgs = list(benchmarkfactory.all_configs(config))
self.assertEqual(len(cfgs), 3)
self.assertEqual([dict] * 3, list(map(type, cfgs)))
Expand All @@ -24,14 +23,8 @@ def test_permutations_1(self):
self.assertEqual(sorted(tvals), sorted(config['t']))

def test_permutations_2(self):
config = dict(
x=12,
y=True,
z={1: 2},
t=[1, 2, "4"],
j=[7, True, "gg"]
)

""" Basic sanity permutations """
config = {"x": 12, "y": True, "z": {1: 2}, "t": [1, 2, 4], "j": [7, True, 'gg']}
cfgs = list(benchmarkfactory.all_configs(config))
self.assertEqual(len(cfgs), 9)
self.assertEqual([dict] * 9, list(map(type, cfgs)))
Expand All @@ -48,14 +41,12 @@ def test_permutations_2(self):
self.assertEqual(1, tjvals.count((tval, jval)))

def test_permutations_0(self):
config = dict(
x=12,
y=True,
z={1: 2},
)
""" Basic sanity permutations """
config = {"x": 12, "y": True, "z": {1: 2}}
cfgs = list(benchmarkfactory.all_configs(config))
self.assertEqual(len(cfgs), 1)
self.assertEqual(cfgs[0], config)

if __name__ == '__main__':
setup_loggers(log_fname='/tmp/cbt-utest.log')
unittest.main()
Loading

0 comments on commit 9a3b030

Please sign in to comment.