diff --git a/benchexec.py b/benchexec.py new file mode 100755 index 000000000..3e407c21b --- /dev/null +++ b/benchexec.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python + +""" +CPAchecker is a tool for configurable software verification. +This file is part of CPAchecker. + +Copyright (C) 2007-2014 Dirk Beyer +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +CPAchecker web page: + http://cpachecker.sosy-lab.org +""" + +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals + +import sys +sys.dont_write_bytecode = True # prevent creation of .pyc files + +""" +Main script of BenchExec for executing a whole benchmark (suite). + +This script can be called from the command line. +For integrating from within Python instantiate the benchexec.BenchExec class +and either call "instance.start()" or "benchexec.main(instance)". +""" + +import benchexec + +benchexec.main(benchexec.BenchExec()) diff --git a/benchexec/__init__.py b/benchexec/__init__.py new file mode 100644 index 000000000..8eb625a81 --- /dev/null +++ b/benchexec/__init__.py @@ -0,0 +1,298 @@ +""" +CPAchecker is a tool for configurable software verification. +This file is part of CPAchecker. + +Copyright (C) 2007-2014 Dirk Beyer +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +CPAchecker web page: + http://cpachecker.sosy-lab.org +""" + +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals + +import logging +import argparse +import os +import signal +import sys +import time + +from .model import Benchmark +from . import util as util +from .outputhandler import OutputHandler + + +""" +Main module of BenchExec for executing a whole benchmark (suite). +To use it, instantiate the benchexec.BenchExec class +and either call "instance.start()" or "benchexec.main(instance)". + +Naming conventions used within BenchExec: + +TOOL: a (verification) tool that should be executed +EXECUTABLE: the executable file that should be called for running a TOOL +SOURCEFILE: one input file for the TOOL +RUN: one execution of a TOOL on one SOURCEFILE +RUNSET: a set of RUNs of one TOOL with at most one RUN per SOURCEFILE +RUNDEFINITION: a template for the creation of a RUNSET with RUNS from one or more SOURCEFILESETs +BENCHMARK: a list of RUNDEFINITIONs and SOURCEFILESETs for one TOOL +OPTION: a user-specified option to add to the command-line of the TOOL when it its run +CONFIG: the configuration of this script consisting of the command-line arguments given by the user +EXECUTOR: a module for executing a BENCHMARK + +"run" always denotes a job to do and is never used as a verb. +"execute" is only used as a verb (this is what is done with a run). +A benchmark or a run set can also be executed, which means to execute all contained runs. + +Variables ending with "file" contain filenames. +Variables ending with "tag" contain references to XML tag objects created by the XML parser. +""" + +class BenchExec(object): + """ + The main class of BenchExec. + It is designed to be extended by inheritance, and for example + allows configuration options to be added and the executor to be replaced. + By default, it uses an executor that executes all runs on the local machine. + """ + + def __init__(self): + self.executor = None + self.stopped_by_interrupt = False + + def start(self, argv): + """ + Start BenchExec. + @param argv: command-line options for BenchExec + """ + parser = self.create_argument_parser() + self.config = parser.parse_args(argv[1:]) + + for arg in self.config.files: + if not os.path.exists(arg) or not os.path.isfile(arg): + parser.error("File {0} does not exist.".format(repr(arg))) + + if os.path.isdir(self.config.output_path): + self.config.output_path = os.path.normpath(self.config.output_path) + os.sep + + self.setup_logging() + + self.executor = self.load_executor() + + returnCode = 0 + for arg in self.config.files: + if self.stopped_by_interrupt: break + logging.debug("Benchmark {0} is started.".format(repr(arg))) + rc = self.execute_benchmark(arg) + returnCode = returnCode or rc + logging.debug("Benchmark {0} is done.".format(repr(arg))) + + logging.debug("I think my job is done. Have a nice day!") + return returnCode + + + def create_argument_parser(self): + """ + Create a parser for the command-line options. + May be overwritten for adding more configuration options. + @return: an argparse.ArgumentParser instance + """ + parser = argparse.ArgumentParser(description= + """Run benchmarks with a (verification) tool. + Documented example files for the benchmark definitions + can be found as 'doc/examples/benchmark*.xml'. + Use the table-generator.py script to create nice tables + from the output of this script.""") + + parser.add_argument("files", nargs='+', metavar="FILE", + help="XML file with benchmark definition") + parser.add_argument("-d", "--debug", + action="store_true", + help="Enable debug output") + + parser.add_argument("-r", "--rundefinition", dest="selected_run_definitions", + action="append", + help="Run only the specified RUN_DEFINITION from the benchmark definition file. " + + "This option can be specified several times.", + metavar="RUN_DEFINITION") + + parser.add_argument("-s", "--sourcefiles", dest="selected_sourcefile_sets", + action="append", + help="Run only the files from the sourcefiles tag with SOURCE as name. " + + "This option can be specified several times.", + metavar="SOURCES") + + parser.add_argument("-n", "--name", + dest="name", default=None, + help="Set name of benchmark execution to NAME", + metavar="NAME") + + parser.add_argument("-o", "--outputpath", + dest="output_path", type=str, + default="./test/results/", + help="Output prefix for the generated results. " + + "If the path is a folder files are put into it," + + "otherwise it is used as a prefix for the resulting files.") + + parser.add_argument("-T", "--timelimit", + dest="timelimit", default=None, + help="Time limit in seconds for each run (-1 to disable)", + metavar="SECONDS") + + parser.add_argument("-M", "--memorylimit", + dest="memorylimit", default=None, + help="Memory limit in MB (-1 to disable)", + metavar="MB") + + parser.add_argument("-N", "--numOfThreads", + dest="num_of_threads", default=None, type=int, + help="Run n benchmarks in parallel", + metavar="n") + + parser.add_argument("-c", "--limitCores", dest="corelimit", + type=int, default=None, + metavar="N", + help="Limit each run of the tool to N CPU cores (-1 to disable).") + + parser.add_argument("--maxLogfileSize", + dest="maxLogfileSize", type=int, default=20, + metavar="MB", + help="Shrink logfiles to given size in MB, if they are too big. (-1 to disable, default value: 20 MB).") + + parser.add_argument("--commit", dest="commit", + action="store_true", + help="If the output path is a git repository without local changes, " + + "add and commit the result files.") + + parser.add_argument("--message", + dest="commit_message", type=str, + default="Results for benchmark run", + help="Commit message if --commit is used.") + + parser.add_argument("--startTime", + dest="start_time", + type=parse_time_arg, + default=None, + metavar="'YYYY-MM-DD hh:mm'", + help='Set the given date and time as the start time of the benchmark.') + + return parser + + + def setup_logging(self): + """ + Configure the logging framework. + """ + if self.config.debug: + logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", + level=logging.DEBUG) + else: + logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", + level=logging.INFO) + + + def load_executor(self): + """ + Create and return the executor module that should be used for benchmarking. + May be overridden for replacing the executor, + for example with an implementation that delegates to some cloud service. + """ + from . import localexecution as executor + return executor + + + def execute_benchmark(self, benchmark_file): + """ + Execute a single benchmark as defined in a file. + If called directly, ensure that config and executor attributes are set up. + @param benchmark_file: the name of a benchmark-definition XML file + @return: a result value from the executor module + """ + benchmark = Benchmark(benchmark_file, self.config, + self.config.start_time or time.localtime()) + self.check_existing_results(benchmark) + + self.executor.init(self.config, benchmark) + output_handler = OutputHandler(benchmark, self.executor.get_system_info()) + + logging.debug("I'm benchmarking {0} consisting of {1} run sets.".format( + repr(benchmark_file), len(benchmark.run_sets))) + + result = self.executor.execute_benchmark(benchmark, output_handler) + + if self.config.commit and not self.stopped_by_interrupt: + util.add_files_to_git_repository(self.config.output_path, output_handler.all_created_files, + self.config.commit_message+'\n\n'+output_handler.description) + return result + + + def check_existing_results(self, benchmark): + """ + Check and abort if the target directory for the benchmark results + already exists in order to avoid overwriting results. + """ + if os.path.exists(benchmark.log_folder): + # we refuse to overwrite existing results + sys.exit('Output directory {0} already exists, will not overwrite existing results.'.format(benchmark.log_folder)) + + + def stop(self): + """ + Stop the execution of a benchmark. + This instance cannot be used anymore afterwards. + Timely termination is not guaranteed, and this method may return before + everything is terminated. + """ + self.stopped_by_interrupt = True + + if self.executor: + self.executor.stop() + + +def parse_time_arg(s): + """ + Parse a time stamp in the "year-month-day hour-minute" format. + """ + try: + return time.strptime(s, "%Y-%m-%d %H:%M") + except ValueError as e: + raise argparse.ArgumentTypeError(e) + + +def signal_handler_ignore(signum, frame): + """ + Log and ignore all signals. + """ + logging.warn('Received signal %d, ignoring it' % signum) + +def main(benchexec, argv=None): + """ + The main method of BenchExec for use in a command-line script. + In addition to calling benchexec.start(argv), + it also handles signals and keyboard interrupts. + It does not return but calls sys.exit(). + @param benchexec: An instance of BenchExec for executing benchmarks. + @param argv: optionally the list of command-line options to use + """ + # ignore SIGTERM + signal.signal(signal.SIGTERM, signal_handler_ignore) + try: + sys.exit(benchexec.start(argv or sys.argv)) + except KeyboardInterrupt: # this block is reached, when interrupt is thrown before or after a run set execution + benchexec.stop() + util.printOut("\n\nScript was interrupted by user, some runs may not be done.") diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py new file mode 100644 index 000000000..aba7490c2 --- /dev/null +++ b/benchexec/cgroups.py @@ -0,0 +1,213 @@ +""" +CPAchecker is a tool for configurable software verification. +This file is part of CPAchecker. + +Copyright (C) 2007-2014 Dirk Beyer +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +CPAchecker web page: + http://cpachecker.sosy-lab.org +""" + +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals + +import logging +import os +import shutil +import signal +import tempfile +import time + +from . import util as util + +CGROUP_NAME_PREFIX='benchmark_' + +ALL_KNOWN_SUBSYSTEMS = set(['cpuacct', 'cpuset', 'freezer', 'memory']) + +def init_cgroup(cgroupsParents, subsystem): + """ + Initialize a cgroup subsystem. + Call this before calling any other methods from this module for this subsystem. + @param cgroupsParents: A dictionary with the cgroup mount points for each subsystem (filled by this method) + @param subsystem: The subsystem to initialize + """ + if not cgroupsParents: + # first call to this method + logging.debug('Analyzing /proc/mounts and /proc/self/cgroup for determining cgroups.') + mounts = _find_cgroup_mounts() + cgroups = _find_own_cgroups() + + for mountedSubsystem, mount in mounts.items(): + cgroupsParents[mountedSubsystem] = os.path.join(mount, cgroups[mountedSubsystem]) + + if not subsystem in cgroupsParents: + logging.warning( +'''Cgroup subsystem {0} not enabled. +Please enable it with "sudo mount -t cgroup none /sys/fs/cgroup".''' + .format(subsystem) + ) + cgroupsParents[subsystem] = None + return + + cgroup = cgroupsParents[subsystem] + logging.debug('My cgroup for subsystem {0} is {1}'.format(subsystem, cgroup)) + + try: # only for testing? + testCgroup = create_cgroup(cgroupsParents, subsystem)[subsystem] + remove_cgroup(testCgroup) + except OSError as e: + logging.warning( +'''Cannot use cgroup hierarchy mounted at {0}, reason: {1} +If permissions are wrong, please run "sudo chmod o+wt \'{0}\'".''' + .format(cgroup, e.strerror)) + cgroupsParents[subsystem] = None + + +def _find_cgroup_mounts(): + mounts = {} + try: + with open('/proc/mounts', 'rt') as mountsFile: + for mount in mountsFile: + mount = mount.split(' ') + if mount[2] == 'cgroup': + mountpoint = mount[1] + options = mount[3] + for option in options.split(','): + if option in ALL_KNOWN_SUBSYSTEMS: + mounts[option] = mountpoint + except IOError as e: + logging.exception('Cannot read /proc/mounts') + return mounts + + +def _find_own_cgroups(): + """ + Given a cgroup subsystem, + find the cgroup in which this process is in. + (Each process is in exactly cgroup in each hierarchy.) + @return the path to the cgroup inside the hierarchy + """ + ownCgroups = {} + try: + with open('/proc/self/cgroup', 'rt') as ownCgroupsFile: + for ownCgroup in ownCgroupsFile: + #each line is "id:subsystem,subsystem:path" + ownCgroup = ownCgroup.strip().split(':') + path = ownCgroup[2][1:] # remove leading / + for subsystem in ownCgroup[1].split(','): + ownCgroups[subsystem] = path + except IOError as e: + logging.exception('Cannot read /proc/self/cgroup') + return ownCgroups + + +def create_cgroup(cgroupsParents, *subsystems): + """ + Try to create a cgroup for each of the given subsystems. + If multiple subsystems are available in the same hierarchy, + a common cgroup for theses subsystems is used. + @param subsystems: a list of cgroup subsystems + @return a map from subsystem to cgroup for each subsystem where it was possible to create a cgroup + """ + createdCgroupsPerSubsystem = {} + createdCgroupsPerParent = {} + for subsystem in subsystems: + if not subsystem in cgroupsParents: + init_cgroup(cgroupsParents, subsystem) + + parentCgroup = cgroupsParents.get(subsystem) + if not parentCgroup: + # subsystem not enabled + continue + if parentCgroup in createdCgroupsPerParent: + # reuse already created cgroup + createdCgroupsPerSubsystem[subsystem] = createdCgroupsPerParent[parentCgroup] + continue + + cgroup = tempfile.mkdtemp(prefix=CGROUP_NAME_PREFIX, dir=parentCgroup) + createdCgroupsPerSubsystem[subsystem] = cgroup + createdCgroupsPerParent[parentCgroup] = cgroup + + # add allowed cpus and memory to cgroup if necessary + # (otherwise we can't add any tasks) + try: + shutil.copyfile(os.path.join(parentCgroup, 'cpuset.cpus'), os.path.join(cgroup, 'cpuset.cpus')) + shutil.copyfile(os.path.join(parentCgroup, 'cpuset.mems'), os.path.join(cgroup, 'cpuset.mems')) + except IOError: + # expected to fail if cpuset subsystem is not enabled in this hierarchy + pass + + return createdCgroupsPerSubsystem + +def add_task_to_cgroup(cgroup, pid): + if cgroup: + with open(os.path.join(cgroup, 'tasks'), 'w') as tasksFile: + tasksFile.write(str(pid)) + + +def kill_all_tasks_in_cgroup_recursively(cgroup): + """ + Iterate through a cgroup and all its children cgroups + and kill all processes in any of these cgroups forcefully. + Additionally, the children cgroups will be deleted. + """ + files = [os.path.join(cgroup,f) for f in os.listdir(cgroup)] + subdirs = filter(os.path.isdir, files) + + for subCgroup in subdirs: + _kill_all_tasks_in_cgroup_recursively(subCgroup) + remove_cgroup(subCgroup) + + kill_all_tasks_in_cgroup(cgroup) + + +def kill_all_tasks_in_cgroup(cgroup): + tasksFile = os.path.join(cgroup, 'tasks') + + i = 0 + while True: + i += 1 + for sig in [signal.SIGINT, signal.SIGTERM, signal.SIGKILL]: + with open(tasksFile, 'rt') as tasks: + task = None + for task in tasks: + task = task.strip() + if i > 1: + logging.warning('Run has left-over process with pid {0} in cgroup {1}, sending signal {2} (try {3}).'.format(task, cgroup, sig, i)) + util.kill_process(int(task), sig) + + if task is None: + return # No process was hanging, exit + + time.sleep(i * 0.5) # wait for the process to exit, this might take some time + + +def remove_cgroup(cgroup): + if cgroup: + if not os.path.exists(cgroup): + logging.warning('Cannot remove CGroup {0}, because it does not exist.'.format(cgroup)) + return + assert os.path.getsize(os.path.join(cgroup, 'tasks')) == 0 + try: + os.rmdir(cgroup) + except OSError: + # sometimes this fails because the cgroup is still busy, we try again once + try: + os.rmdir(cgroup) + except OSError as e: + logging.warning("Failed to remove cgroup {0}: error {1} ({2})" + .format(cgroup, e.errno, e.strerror)) diff --git a/benchexec/filewriter.py b/benchexec/filewriter.py new file mode 100644 index 000000000..2faf5af42 --- /dev/null +++ b/benchexec/filewriter.py @@ -0,0 +1,79 @@ +""" +CPAchecker is a tool for configurable software verification. +This file is part of CPAchecker. + +Copyright (C) 2007-2014 Dirk Beyer +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +CPAchecker web page: + http://cpachecker.sosy-lab.org +""" + +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals + +import os +from . import util + +class FileWriter: + """ + The class FileWriter is a wrapper for writing content into a file. + """ + + def __init__(self, filename, content): + """ + The constructor of FileWriter creates the file. + If the file exist, it will be OVERWRITTEN without a message! + """ + + self.__filename = filename + self.__needsRewrite = False + self.__content = content + + # Open file with "w" at least once so it will be overwritten. + util.write_file(content, self.__filename) + + def append(self, newContent, keep=True): + """ + Add content to the represented file. + If keep is False, the new content will be forgotten during the next call + to this method. + """ + content = self.__content + newContent + if keep: + self.__content = content + + if self.__needsRewrite: + """ + Replace the content of the file. + A temporary file is used to avoid loss of data through an interrupt. + """ + tmpFilename = self.__filename + ".tmp" + + util.write_file(content, tmpFilename) + + os.rename(tmpFilename, self.__filename) + else: + with open(self.__filename, "a") as file: + file.write(newContent) + + self.__needsRewrite = not keep + + def replace(self, newContent): + # clear and append + self.__content = '' + self.__needsRewrite = True + self.append(newContent) diff --git a/benchexec/localexecution.py b/benchexec/localexecution.py new file mode 100644 index 000000000..f027b0b61 --- /dev/null +++ b/benchexec/localexecution.py @@ -0,0 +1,539 @@ +""" +CPAchecker is a tool for configurable software verification. +This file is part of CPAchecker. + +Copyright (C) 2007-2014 Dirk Beyer +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +CPAchecker web page: + http://cpachecker.sosy-lab.org +""" + +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals + +import sys +sys.dont_write_bytecode = True # prevent creation of .pyc files + +try: + import Queue +except ImportError: # Queue was renamed to queue in Python 3 + import queue as Queue + +import collections +import itertools +import logging +import math +import os +import re +import resource +import subprocess +import threading +import time + +from .model import CORELIMIT, MEMLIMIT, TIMELIMIT, SOFTTIMELIMIT +from . import cgroups +from .runexecutor import RunExecutor +from .systeminfo import SystemInfo +from . import util as util + + +WORKER_THREADS = [] +STOPPED_BY_INTERRUPT = False + +_BYTE_FACTOR = 1000 # byte in kilobyte + +_TURBO_BOOST_FILE = "/sys/devices/system/cpu/cpufreq/boost" +_TURBO_BOOST_FILE_PSTATE = "/sys/devices/system/cpu/intel_pstate/no_turbo" + +def init(config, benchmark): + benchmark.executable = benchmark.tool.executable() + benchmark.tool_version = benchmark.tool.version(benchmark.executable) + + try: + processes = subprocess.Popen(['ps', '-eo', 'cmd'], stdout=subprocess.PIPE).communicate()[0] + if len(re.findall("python.*benchmark\.py", util.decode_to_string(processes))) > 1: + logging.warn("Already running instance of this script detected. " + \ + "Please make sure to not interfere with somebody else's benchmarks.") + except OSError: + pass # this does not work on Windows + +def get_system_info(): + return SystemInfo() + +def execute_benchmark(benchmark, output_handler): + + run_sets_executed = 0 + + logging.debug("I will use {0} threads.".format(benchmark.num_of_threads)) + + cgroupsParents = {} + cgroups.init_cgroup(cgroupsParents, 'cpuset') + cgroupCpuset = cgroupsParents['cpuset'] + + coreAssignment = None # cores per run + memoryAssignment = None # memory banks per run + if CORELIMIT in benchmark.rlimits: + if not cgroupCpuset: + sys.exit("Cannot limit the number of CPU cores/memory nodes without cpuset cgroup.") + coreAssignment = _get_cpu_cores_per_run(benchmark.rlimits[CORELIMIT], benchmark.num_of_threads, cgroupCpuset) + memoryAssignment = _get_memory_banks_per_run(coreAssignment, cgroupCpuset) + + if MEMLIMIT in benchmark.rlimits: + # check whether we have enough memory in the used memory banks for all runs + memLimit = benchmark.rlimits[MEMLIMIT] * _BYTE_FACTOR * _BYTE_FACTOR # MB to Byte + _check_memory_size(memLimit, benchmark.num_of_threads, memoryAssignment, cgroupsParents) + + if benchmark.num_of_threads > 1 and _is_turbo_boost_enabled(): + logging.warning("Turbo boost of CPU is enabled. Starting more than one benchmark in parallel affects the CPU frequency and thus makes the performance unreliable.") + + # iterate over run sets + for runSet in benchmark.run_sets: + + if STOPPED_BY_INTERRUPT: break + + if not runSet.should_be_executed(): + output_handler.output_for_skipping_run_set(runSet) + + elif not runSet.runs: + output_handler.output_for_skipping_run_set(runSet, "because it has no files") + + else: + run_sets_executed += 1 + # get times before runSet + ruBefore = resource.getrusage(resource.RUSAGE_CHILDREN) + walltime_before = time.time() + energyBefore = util.measure_energy() + + output_handler.output_before_run_set(runSet) + + # put all runs into a queue + for run in runSet.runs: + _Worker.working_queue.put(run) + + # create some workers + for i in range(benchmark.num_of_threads): + cores = coreAssignment[i] if coreAssignment else None + memBanks = memoryAssignment[i] if memoryAssignment else None + WORKER_THREADS.append(_Worker(benchmark, cores, memBanks, output_handler)) + + # wait until all tasks are done, + # instead of queue.join(), we use a loop and sleep(1) to handle KeyboardInterrupt + finished = False + while not finished and not STOPPED_BY_INTERRUPT: + try: + _Worker.working_queue.all_tasks_done.acquire() + finished = (_Worker.working_queue.unfinished_tasks == 0) + finally: + _Worker.working_queue.all_tasks_done.release() + + try: + time.sleep(0.1) # sleep some time + except KeyboardInterrupt: + stop() + + # get times after runSet + walltime_after = time.time() + energy = util.measure_energy(energyBefore) + usedWallTime = walltime_after - walltime_before + ruAfter = resource.getrusage(resource.RUSAGE_CHILDREN) + usedCpuTime = (ruAfter.ru_utime + ruAfter.ru_stime) \ + - (ruBefore.ru_utime + ruBefore.ru_stime) + + if STOPPED_BY_INTERRUPT: + output_handler.set_error('interrupted') + output_handler.output_after_run_set(runSet, cputime=usedCpuTime, walltime=usedWallTime, energy=energy) + + output_handler.output_after_benchmark(STOPPED_BY_INTERRUPT) + + +def stop(): + global STOPPED_BY_INTERRUPT + STOPPED_BY_INTERRUPT = True + + # kill running jobs + util.printOut("killing subprocesses...") + for worker in WORKER_THREADS: + worker.stop() + + # wait until all threads are stopped + for worker in WORKER_THREADS: + worker.join() + + +def _get_cpu_cores_per_run(coreLimit, num_of_threads, cgroupCpuset): + """ + Calculate an assignment of the available CPU cores to a number + of parallel benchmark executions such that each run gets its own cores + without overlapping of cores between runs. + In case the machine has hyper-threading, this method tries to avoid + putting two different runs on the same physical core + (but it does not guarantee this if the number of parallel runs is too high to avoid it). + In case the machine has multiple CPUs, this method avoids + splitting a run across multiple CPUs if the number of cores per run + is lower than the number of cores per CPU + (splitting a run over multiple CPUs provides worse performance). + It will also try to split the runs evenly across all available CPUs. + + A few theoretically-possible cases are not implemented, + for example assigning three 10-core runs on a machine + with two 16-core CPUs (this would have unfair core assignment + and thus undesirable performance characteristics anyway). + + The list of available cores is read from the cgroup filesystem, + such that the assigned cores are a subset of the cores + that the current process is allowed to use. + This script does currently not support situations + where the available cores are assymetrically split over CPUs, + e.g. 3 cores on one CPU and 5 on another. + + @param coreLimit: the number of cores for each run + @param num_of_threads: the number of parallel benchmark executions + @return a list of lists, where each inner list contains the cores for one run + """ + try: + # read list of available CPU cores + allCpus = util.parse_int_list(util.read_file(cgroupCpuset, 'cpuset.cpus')) + logging.debug("List of available CPU cores is {0}.".format(allCpus)) + + # read mapping of core to CPU ("physical package") + physical_packages = map(lambda core : int(util.read_file('/sys/devices/system/cpu/cpu{0}/topology/physical_package_id'.format(core))), allCpus) + cores_of_package = collections.defaultdict(list) + for core, package in zip(allCpus, physical_packages): + cores_of_package[package].append(core) + logging.debug("Physical packages of cores are {0}.".format(str(cores_of_package))) + + # read hyper-threading information (sibling cores sharing the same physical core) + siblings_of_core = {} + for core in allCpus: + siblings = util.parse_int_list(util.read_file('/sys/devices/system/cpu/cpu{0}/topology/thread_siblings_list'.format(core))) + siblings_of_core[core] = siblings + logging.debug("Siblings of cores are {0}.".format(str(siblings_of_core))) + except ValueError as e: + sys.exit("Could not read CPU information from kernel: {0}".format(e)) + + return _get_cpu_cores_per_run0(coreLimit, num_of_threads, allCpus, cores_of_package, siblings_of_core) + +def _get_cpu_cores_per_run0(coreLimit, num_of_threads, allCpus, cores_of_package, siblings_of_core): + """This method does the actual work of _get_cpu_cores_per_run + without reading the machine architecture from the filesystem + in order to be testable. For description, c.f. above. + Note that this method might change the input parameters! + Do not call it directly, call getCpuCoresPerRun()! + @param allCpus: the list of all available cores + @param cores_of_package: a mapping from package (CPU) ids to lists of cores that belong to this CPU + @param siblings_of_core: a mapping from each core to a list of sibling cores including the core itself (a sibling is a core sharing the same physical core) + """ + # First, do some checks whether this algorithm has a chance to work. + if coreLimit > len(allCpus): + sys.exit("Cannot run benchmarks with {0} CPU cores, only {1} CPU cores available.".format(coreLimit, len(allCpus))) + if coreLimit * num_of_threads > len(allCpus): + sys.exit("Cannot run {0} benchmarks in parallel with {1} CPU cores each, only {2} CPU cores available. Please reduce the number of threads to {3}.".format(num_of_threads, coreLimit, len(allCpus), len(allCpus) // coreLimit)) + + package_size = None # Number of cores per package + for package, cores in cores_of_package.items(): + if package_size is None: + package_size = len(cores) + elif package_size != len(cores): + sys.exit("Assymetric machine architecture not supported: CPU package {0} has {1} cores, but other package has {2} cores.".format(package, len(cores), package_size)) + + core_size = None # Number of threads per core + for core, siblings in siblings_of_core.items(): + if core_size is None: + core_size = len(siblings) + elif core_size != len(siblings): + sys.exit("Assymetric machine architecture not supported: CPU core {0} has {1} siblings, but other core has {2} siblings.".format(core, len(siblings), core_size)) + + # Second, compute some values we will need. + package_count = len(cores_of_package) + + coreLimit_rounded_up = int(math.ceil(coreLimit / core_size) * core_size) + assert coreLimit <= coreLimit_rounded_up < (coreLimit + core_size) + #assert coreLimit_rounded_up <= package_size + if coreLimit_rounded_up * num_of_threads > len(allCpus): + logging.warning("The number of threads is too high and hyper-threading sibling cores need to be split among different runs, which makes benchmarking unreliable. Please reduce the number of threads to {0}.".format(len(allCpus) // coreLimit_rounded_up)) + + packages_per_run = int(math.ceil(coreLimit_rounded_up / package_size)) + if packages_per_run > 1 and packages_per_run * num_of_threads > package_count: + sys.exit("Cannot split runs over multiple CPUs and at the same time assign multiple runs to the same CPU. Please reduce the number of threads to {0}.".format(package_count // packages_per_run)) + + runs_per_package = int(math.ceil(num_of_threads / package_count)) + assert packages_per_run == 1 or runs_per_package == 1 + if packages_per_run == 1 and runs_per_package * coreLimit > package_size: + sys.exit("Cannot run {} benchmarks with {} cores on {} CPUs with {} cores, because runs would need to be split across multiple CPUs. Please reduce the number of threads.".format(num_of_threads, coreLimit, package_count, package_size)) + + logging.debug("Going to assign at most {0} runs per package, each one using {1} cores and blocking {2} cores on {3} packages.".format(runs_per_package, coreLimit, coreLimit_rounded_up, packages_per_run)) + + # Third, do the actual core assignment. + result = [] + used_cores = set() + for run in range(num_of_threads): + # this calculation ensures that runs are split evenly across packages + start_package = (run * packages_per_run) % package_count + cores = [] + for package in range(start_package, start_package + packages_per_run): + for core in cores_of_package[package]: + if core not in cores: + cores.extend(c for c in siblings_of_core[core] if not c in used_cores) + if len(cores) >= coreLimit: + break + cores = cores[:coreLimit] # shrink if we got more cores than necessary + # remove used cores such that we do not try to use them again + cores_of_package[package] = [core for core in cores_of_package[package] if core not in cores] + + assert len(cores) == coreLimit, "Wrong number of cores for run {} - previous results: {}, remaining cores per package: {}, current cores: {}".format(run, result, cores_of_package, cores) + used_cores.update(cores) + result.append(sorted(cores)) + + assert len(result) == num_of_threads + assert all(len(cores) == coreLimit for cores in result) + assert len(set(itertools.chain(*result))) == num_of_threads * coreLimit, "Cores are not uniquely assigned to runs: " + result + + logging.debug("Final core assignment: {0}.".format(str(result))) + return result + + +def _get_memory_banks_per_run(coreAssignment, cgroupCpuset): + """Get an assignment of memory banks to runs that fits to the given coreAssignment, + i.e., no run is allowed to use memory that is not local (on the same NUMA node) + to one of its CPU cores.""" + try: + # read list of available memory banks + allMems = set(_get_allowed_memory_banks(cgroupCpuset)) + + result = [] + for cores in coreAssignment: + mems = set() + for core in cores: + coreDir = '/sys/devices/system/cpu/cpu{0}/'.format(core) + mems.update(_get_memory_banks_listed_in_dir(coreDir)) + allowedMems = sorted(mems.intersection(allMems)) + logging.debug("Memory banks for cores {} are {}, of which we can use {}.".format(core, list(mems), allowedMems)) + + result.append(allowedMems) + + assert len(result) == len(coreAssignment) + + if any(result) and os.path.isdir('/sys/devices/system/node/'): + return result + else: + # All runs get the empty list of memory regions + # because this system has no NUMA support + return None + except ValueError as e: + sys.exit("Could not read memory information from kernel: {0}".format(e)) + + +def _get_allowed_memory_banks(cgroupCpuset): + """Get the list of all memory banks allowed by the given cgroup.""" + return util.parse_int_list(util.read_file(cgroupCpuset, 'cpuset.mems')) + +def _get_memory_banks_listed_in_dir(dir): + """Get all memory banks the kernel lists in a given directory. + Such a directory can be /sys/devices/system/node/ (contains all memory banks) + or /sys/devices/system/cpu/cpu*/ (contains all memory banks on the same NUMA node as that core).""" + # Such directories contain entries named "node" for each memory bank + return [int(entry[4:]) for entry in os.listdir(dir) if entry.startswith('node')] + + +def _check_memory_size(memLimit, num_of_threads, memoryAssignment, cgroupsParents): + """Check whether the desired amount of parallel benchmarks fits in the memory. + Implemented are checks for memory limits via cgroup controller "memory" and + memory bank restrictions via cgroup controller "cpuset", + as well as whether the system actually has enough memory installed. + @param memLimit: the memory limit in bytes per run + @param num_of_threads: the number of parallel benchmark executions + @param memoryAssignment: the allocation of memory banks to runs (if not present, all banks are assigned to all runs) + """ + try: + # Check amount of memory allowed via cgroups. + def check_limit(actualLimit): + if actualLimit < memLimit: + sys.exit("Cgroups allow only {} bytes of memory to be used, cannot execute runs with {} bytes of memory.".format(actualLimit, memLimit)) + elif actualLimit < memLimit * num_of_threads: + sys.exit("Cgroups allow only {} bytes of memory to be used, not enough for {} benchmarks with {} bytes each. Please reduce the number of threads".format(actualLimit, num_of_threads, memLimit)) + + if not os.path.isdir('/sys/devices/system/node/'): + logging.debug("System without NUMA support in Linux kernel, ignoring memory assignment.") + return + + cgroups.init_cgroup(cgroupsParents, 'memory') + cgroupMemory = cgroupsParents['memory'] + if cgroupMemory: + # We use the entries hierarchical_*_limit in memory.stat and not memory.*limit_in_bytes + # because the former may be lower if memory.use_hierarchy is enabled. + with open(os.path.join(cgroupMemory, 'memory.stat')) as f: + for line in f: + if line.startswith('hierarchical_memory_limit'): + check_limit(int(line.split()[1])) + elif line.startswith('hierarchical_memsw_limit'): + check_limit(int(line.split()[1])) + + # Get list of all memory banks, either from memory assignment or from system. + if not memoryAssignment: + cgroups.init_cgroup(cgroupsParents, 'cpuset') + cgroupCpuset = cgroupsParents['cpuset'] + if cgroupCpuset: + allMems = _get_allowed_memory_banks(cgroupCpuset) + else: + allMems = _get_memory_banks_listed_in_dir('/sys/devices/system/node/') + memoryAssignment = [allMems] * num_of_threads # "fake" memory assignment: all threads on all banks + else: + allMems = set(itertools.chain(*memoryAssignment)) + + memSizes = dict((mem, _get_memory_bank_size(mem)) for mem in allMems) + except ValueError as e: + sys.exit("Could not read memory information from kernel: {0}".format(e)) + + # Check whether enough memory is allocatable on the assigned memory banks. + # As the sum of the sizes of the memory banks is at most the total size of memory in the system, + # and we do this check always even if the banks are not restricted, + # this also checks whether the system has actually enough memory installed. + usedMem = collections.Counter() + for mems_of_run in memoryAssignment: + totalSize = sum(memSizes[mem] for mem in mems_of_run) + if totalSize < memLimit: + sys.exit("Memory banks {} do not have enough memory for one run, only {} bytes available.".format(mems_of_run, totalSize)) + usedMem[tuple(mems_of_run)] += memLimit + if usedMem[tuple(mems_of_run)] > totalSize: + sys.exit("Memory banks {} do not have enough memory for all runs, only {} bytes available. Please reduce the number of threads.".format(mems_of_run, totalSize)) + +def _get_memory_bank_size(memBank): + """Get the size of a memory bank in bytes.""" + fileName = '/sys/devices/system/node/node{0}/meminfo'.format(memBank) + size = None + with open(fileName) as f: + for line in f: + if 'MemTotal' in line: + size = line.split(':')[1].strip() + if size[-3:] != ' kB': + raise ValueError('"{}" in file {} is not a memory size.'.format(size, fileName)) + size = int(size[:-3]) * 1024 # kB to Byte + logging.debug("Memory bank {} has size {} bytes.".format(memBank, size)) + return size + raise ValueError('Failed to read total memory from {}.'.format(fileName)) + + +def _is_turbo_boost_enabled(): + try: + if os.path.exists(_TURBO_BOOST_FILE): + boost_enabled = int(util.read_file(_TURBO_BOOST_FILE)) + if not (0 <= boost_enabled <= 1): + raise ValueError('Invalid value {} for turbo boost activation'.format(boost_enabled)) + return boost_enabled != 0 + if os.path.exists(_TURBO_BOOST_FILE_PSTATE): + boost_disabled = int(util.read_file(_TURBO_BOOST_FILE_PSTATE)) + if not (0 <= boost_disabled <= 1): + raise ValueError('Invalid value {} for turbo boost activation'.format(boost_enabled)) + return boost_disabled != 1 + except ValueError as e: + sys.exit("Could not read turbo-boost information from kernel: {0}".format(e)) + + +class _Worker(threading.Thread): + """ + A Worker is a deamonic thread, that takes jobs from the working_queue and runs them. + """ + working_queue = Queue.Queue() + + def __init__(self, benchmark, my_cpus, my_memory_nodes, output_handler): + threading.Thread.__init__(self) # constuctor of superclass + self.benchmark = benchmark + self.my_cpus = my_cpus + self.my_memory_nodes = my_memory_nodes + self.output_handler = output_handler + self.run_executor = RunExecutor() + self.setDaemon(True) + + self.start() + + + def run(self): + while not _Worker.working_queue.empty() and not STOPPED_BY_INTERRUPT: + currentRun = _Worker.working_queue.get_nowait() + try: + self.execute(currentRun) + except SystemExit as e: + logging.critical(e) + except BaseException as e: + logging.exception('Exception during run execution') + _Worker.working_queue.task_done() + + + def execute(self, run): + """ + This function executes the tool with a sourcefile with options. + It also calls functions for output before and after the run. + """ + self.output_handler.output_before_run(run) + benchmark = self.benchmark + + memlimit = None + if MEMLIMIT in benchmark.rlimits: + memlimit = benchmark.rlimits[MEMLIMIT] * _BYTE_FACTOR * _BYTE_FACTOR # MB to Byte + + maxLogfileSize = benchmark.config.maxLogfileSize + if maxLogfileSize: + maxLogfileSize *= _BYTE_FACTOR * _BYTE_FACTOR # MB to Byte + elif maxLogfileSize == -1: + maxLogfileSize = None + + result = \ + self.run_executor.execute_run( + run.cmdline(), run.log_file, + hardtimelimit=benchmark.rlimits.get(TIMELIMIT), + softtimelimit=benchmark.rlimits.get(SOFTTIMELIMIT), + cores=self.my_cpus, + memory_nodes=self.my_memory_nodes, + memlimit=memlimit, + environments=benchmark.environment(), + workingDir=benchmark.working_directory(), + maxLogfileSize=maxLogfileSize) + + for key, value in result.items(): + if key == 'walltime': + run.walltime == value + elif key == 'cputime': + run.cputime = value + elif key == 'memory': + run.values['memUsage'] = result['memory'] + elif key == 'energy': + for ekey, evalue in value.items(): + run.values['energy-'+ekey] = evalue + else: + run.values['@' + key] = value + + if self.run_executor.PROCESS_KILLED: + # If the run was interrupted, we ignore the result and cleanup. + run.walltime = 0 + run.cputime = 0 + try: + if benchmark.config.debug: + os.rename(run.log_file, run.log_file + ".killed") + else: + os.remove(run.log_file) + except OSError: + pass + return + + run.after_execution(result['exitcode']) + self.output_handler.output_after_run(run) + + + def stop(self): + # asynchronous call to runexecutor, + # the worker will stop asap, but not within this method. + self.run_executor.stop() diff --git a/benchexec/model.py b/benchexec/model.py new file mode 100644 index 000000000..5a3f69763 --- /dev/null +++ b/benchexec/model.py @@ -0,0 +1,706 @@ +""" +CPAchecker is a tool for configurable software verification. +This file is part of CPAchecker. + +Copyright (C) 2007-2014 Dirk Beyer +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +CPAchecker web page: + http://cpachecker.sosy-lab.org +""" + +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals + +import logging +import os +import time +import xml.etree.ElementTree as ET +import sys + +from datetime import date + +from . import result +from . import util as util + +MEMLIMIT = "memlimit" +TIMELIMIT = "timelimit" +CORELIMIT = "cpuCores" + +SOFTTIMELIMIT = 'softtimelimit' +HARDTIMELIMIT = 'hardtimelimit' + +PROPERTY_TAG = "propertyfile" + +_BYTE_FACTOR = 1000 # byte in kilobyte + +def substitute_vars(oldList, runSet, sourcefile=None): + """ + This method replaces special substrings from a list of string + and return a new list. + """ + benchmark = runSet.benchmark + + # list with tuples (key, value): 'key' is replaced by 'value' + keyValueList = [('${benchmark_name}', benchmark.name), + ('${benchmark_date}', benchmark.instance), + ('${benchmark_path}', benchmark.base_dir or '.'), + ('${benchmark_path_abs}', os.path.abspath(benchmark.base_dir)), + ('${benchmark_file}', os.path.basename(benchmark.benchmark_file)), + ('${benchmark_file_abs}', os.path.abspath(os.path.basename(benchmark.benchmark_file))), + ('${logfile_path}', os.path.dirname(runSet.log_folder) or '.'), + ('${logfile_path_abs}', os.path.abspath(runSet.log_folder)), + ('${rundefinition_name}', runSet.real_name if runSet.real_name else ''), + ('${test_name}', runSet.real_name if runSet.real_name else '')] + + if sourcefile: + keyValueList.append(('${sourcefile_name}', os.path.basename(sourcefile))) + keyValueList.append(('${sourcefile_path}', os.path.dirname(sourcefile) or '.')) + keyValueList.append(('${sourcefile_path_abs}', os.path.dirname(os.path.abspath(sourcefile)))) + + # do not use keys twice + assert len(set((key for (key, value) in keyValueList))) == len(keyValueList) + + newList = [] + + for oldStr in oldList: + newStr = oldStr + for (key, value) in keyValueList: + newStr = newStr.replace(key, value) + if '${' in newStr: + logging.warn("a variable was not replaced in '{0}'".format(newStr)) + newList.append(newStr) + + return newList + + + +class Benchmark: + """ + The class Benchmark manages the import of source files, options, columns and + the tool from a benchmark_file. + This class represents the tag. + """ + + def __init__(self, benchmark_file, config, start_time): + """ + The constructor of Benchmark reads the source files, options, columns and the tool + from the XML in the benchmark_file.. + """ + logging.debug("I'm loading the benchmark {0}.".format(benchmark_file)) + + self.config = config + self.benchmark_file = benchmark_file + self.base_dir = os.path.dirname(self.benchmark_file) + + # get benchmark-name + self.name = os.path.basename(benchmark_file)[:-4] # remove ending ".xml" + if config.name: + self.name += "."+config.name + + self.start_time = start_time + self.instance = time.strftime("%y-%m-%d_%H%M", self.start_time) + + self.output_base_name = config.output_path + self.name + "." + self.instance + self.log_folder = self.output_base_name + ".logfiles" + os.path.sep + + # parse XML + rootTag = ET.ElementTree().parse(benchmark_file) + + # get tool + tool_name = rootTag.get('tool') + if not tool_name: + sys.exit('A tool needs to be specified in the benchmark definition file.') + toolModule = "benchexec.tools." + tool_name + try: + self.tool = __import__(toolModule, fromlist=['Tool']).Tool() + except ImportError as ie: + sys.exit('Unsupported tool "{0}" specified. ImportError: {1}'.format(tool_name, ie)) + except AttributeError: + sys.exit('The module for "{0}" does not define the necessary class.'.format(tool_name)) + + self.tool_name = self.tool.name() + # will be set from the outside if necessary (may not be the case in SaaS environments) + self.tool_version = None + self.executable = None + + logging.debug("The tool to be benchmarked is {0}.".format(str(self.tool_name))) + + self.rlimits = {} + keys = list(rootTag.keys()) + for limit in [MEMLIMIT, TIMELIMIT, CORELIMIT]: + if limit in keys: + self.rlimits[limit] = int(rootTag.get(limit)) + + # override limits from XML with values from command line + def override_limit(configVal, limit): + if configVal != None: + val = int(configVal) + if val == -1: # infinity + if limit in self.rlimits: + self.rlimits.pop(limit) + else: + self.rlimits[limit] = val + + override_limit(config.memorylimit, MEMLIMIT) + override_limit(config.timelimit, TIMELIMIT) + override_limit(config.corelimit, CORELIMIT) + + if HARDTIMELIMIT in keys: + hardtimelimit = int(rootTag.get(HARDTIMELIMIT)) + if TIMELIMIT in self.rlimits: + if hardtimelimit < self.rlimits[TIMELIMIT]: + logging.warning('Hard timelimit %d is smaller than timelimit %d, ignoring the former.' + % (hardtimelimit, self.rlimits[TIMELIMIT])) + else: + self.rlimits[SOFTTIMELIMIT] = self.rlimits[TIMELIMIT] + self.rlimits[TIMELIMIT] = hardtimelimit + else: + self.rlimits[TIMELIMIT] = hardtimelimit + + # get number of threads, default value is 1 + self.num_of_threads = int(rootTag.get("threads")) if ("threads" in keys) else 1 + if config.num_of_threads != None: + self.num_of_threads = config.num_of_threads + if self.num_of_threads < 1: + logging.error("At least ONE thread must be given!") + sys.exit() + + # get global options and property_files + self.options = util.get_list_from_xml(rootTag) + self.property_files = util.get_list_from_xml(rootTag, tag=PROPERTY_TAG, attributes=[]) + + # get columns + self.columns = Benchmark.load_columns(rootTag.find("columns")) + + # get global source files, they are used in all run sets + globalSourcefilesTags = rootTag.findall("sourcefiles") + + # get required files + self._required_files = set() + for required_files_tag in rootTag.findall('requiredfiles'): + required_files = util.expand_filename_pattern(required_files_tag.text, self.base_dir) + if not required_files: + logging.warning('Pattern {0} in requiredfiles tag did not match any file.'.format(required_files_tag.text)) + self._required_files = self._required_files.union(required_files) + + # get requirements + self.requirements = Requirements(rootTag.findall("require"), self.rlimits, config) + + self.result_files_pattern = None + resultFilesTags = rootTag.findall("resultfiles") + if resultFilesTags: + if len(resultFilesTags) > 1: + logger.warning("Benchmark file {0} has multiple tags, ignoring all but the first.") + self.result_files_pattern = resultFilesTags[0].text + + # get benchmarks + self.run_sets = [] + for (i, rundefinitionTag) in enumerate(rootTag.findall("rundefinition")): + self.run_sets.append(RunSet(rundefinitionTag, self, i+1, globalSourcefilesTags)) + + if not self.run_sets: + for (i, rundefinitionTag) in enumerate(rootTag.findall("test")): + self.run_sets.append(RunSet(rundefinitionTag, self, i+1, globalSourcefilesTags)) + if self.run_sets: + logging.warning("Benchmark file {0} uses deprecated tags. Please rename them to .".format(benchmark_file)) + else: + logging.warning("Benchmark file {0} specifies no runs to execute (no tags found).".format(benchmark_file)) + + if not any(runSet.should_be_executed() for runSet in self.run_sets): + logging.warning("No runSet selected, nothing will be executed.") + if config.selected_run_definitions: + logging.warning("The selection {0} does not match any runSet of {1}".format( + str(config.selected_run_definitions), + str([runSet.real_name for runSet in self.run_sets]) + )) + + + def required_files(self): + return self._required_files.union(self.tool.program_files(self.executable)) + + + def add_required_file(self, filename=None): + if filename is not None: + self._required_files.add(filename) + + + def working_directory(self): + return self.tool.working_directory(self.executable) + + + def environment(self): + return self.tool.environment(self.executable) + + + @staticmethod + def load_columns(columnsTag): + """ + @param columnsTag: the columnsTag from the XML file + @return: a list of Columns() + """ + + logging.debug("I'm loading some columns for the outputfile.") + columns = [] + if columnsTag != None: # columnsTag is optional in XML file + for columnTag in columnsTag.findall("column"): + pattern = columnTag.text + title = columnTag.get("title", pattern) + number_of_digits = columnTag.get("numberOfDigits") # digits behind comma + column = Column(pattern, title, number_of_digits) + columns.append(column) + logging.debug('Column "{0}" with title "{1}" loaded from XML file.' + .format(column.text, column.title)) + return columns + + +class RunSet: + """ + The class RunSet manages the import of files and options of a run set. + """ + + def __init__(self, rundefinitionTag, benchmark, index, globalSourcefilesTags=[]): + """ + The constructor of RunSet reads run-set name and the source files from rundefinitionTag. + Source files can be included or excluded, and imported from a list of + names in another file. Wildcards and variables are expanded. + @param rundefinitionTag: a rundefinitionTag from the XML file + """ + + self.benchmark = benchmark + + # get name of run set, name is optional, the result can be "None" + self.real_name = rundefinitionTag.get("name") + + # index is the number of the run set + self.index = index + + self.log_folder = benchmark.log_folder + if self.real_name: + self.log_folder += self.real_name + "." + + # get all run-set-specific options from rundefinitionTag + self.options = benchmark.options + util.get_list_from_xml(rundefinitionTag) + self.property_files = benchmark.property_files + util.get_list_from_xml(rundefinitionTag, tag=PROPERTY_TAG, attributes=[]) + + # get run-set specific required files + required_files = set() + for required_files_tag in rundefinitionTag.findall('requiredfiles'): + thisRequiredFiles = util.expand_filename_pattern(required_files_tag.text, benchmark.base_dir) + if not thisRequiredFiles: + logging.warning('Pattern {0} in requiredfiles tag did not match any file.'.format(required_files_tag.text)) + required_files = required_files.union(thisRequiredFiles) + + # get all runs, a run contains one sourcefile with options + self.blocks = self.extract_runs_from_xml(globalSourcefilesTags + rundefinitionTag.findall("sourcefiles"), + required_files) + self.runs = [run for block in self.blocks for run in block.runs] + + names = [self.real_name] + if len(self.blocks) == 1: + # there is exactly one source-file set to run, append its name to run-set name + names.append(self.blocks[0].real_name) + self.name = '.'.join(filter(None, names)) + self.full_name = self.benchmark.name + (("." + self.name) if self.name else "") + + # Currently we store logfiles as "basename.log", + # so we cannot distinguish sourcefiles in different folder with same basename. + # For a 'local benchmark' this causes overriding of logfiles after reading them, + # so the result is correct, only the logfile is gone. + # For 'cloud-mode' the logfile is overridden before reading it, + # so the result will be wrong and every measured value will be missing. + if self.should_be_executed(): + sourcefilesSet = set() + for run in self.runs: + base = os.path.basename(run.identifier) + if base in sourcefilesSet: + logging.warning("sourcefile with basename '" + base + + "' appears twice in runset. This could cause problems with equal logfile-names.") + else: + sourcefilesSet.add(base) + del sourcefilesSet + + + def should_be_executed(self): + return not self.benchmark.config.selected_run_definitions \ + or self.real_name in self.benchmark.config.selected_run_definitions + + + def extract_runs_from_xml(self, sourcefilesTagList, globalRequiredFiles): + ''' + This function builds a list of SourcefileSets (containing filename with options). + The files and their options are taken from the list of sourcefilesTags. + ''' + # runs are structured as sourcefile sets, one set represents one sourcefiles tag + blocks = [] + + for index, sourcefilesTag in enumerate(sourcefilesTagList): + sourcefileSetName = sourcefilesTag.get("name") + matchName = sourcefileSetName or str(index) + if self.benchmark.config.selected_sourcefile_sets \ + and matchName not in self.benchmark.config.selected_sourcefile_sets: + continue + + required_files = set() + for required_files_tag in sourcefilesTag.findall('requiredfiles'): + thisRequiredFiles = util.expand_filename_pattern(required_files_tag.text, self.benchmark.base_dir) + if not thisRequiredFiles: + logging.warning('Pattern {0} in requiredfiles tag did not match any file.'.format(required_files_tag.text)) + required_files = required_files.union(thisRequiredFiles) + + # get lists of filenames + sourcefiles = self.get_sourcefiles_from_xml(sourcefilesTag, self.benchmark.base_dir) + + # get file-specific options for filenames + fileOptions = util.get_list_from_xml(sourcefilesTag) + property_files = util.get_list_from_xml(sourcefilesTag, tag=PROPERTY_TAG, attributes=[]) + + currentRuns = [] + for sourcefile in sourcefiles: + currentRuns.append(Run(sourcefile, fileOptions, self, property_files, + list(globalRequiredFiles.union(required_files)))) + + blocks.append(SourcefileSet(sourcefileSetName, index, currentRuns)) + return blocks + + + def get_sourcefiles_from_xml(self, sourcefilesTag, base_dir): + sourcefiles = [] + + # get included sourcefiles + for includedFiles in sourcefilesTag.findall("include"): + sourcefiles += self.expand_filename_pattern(includedFiles.text, base_dir) + + # get sourcefiles from list in file + for includesFilesFile in sourcefilesTag.findall("includesfile"): + + for file in self.expand_filename_pattern(includesFilesFile.text, base_dir): + + # check for code (if somebody confuses 'include' and 'includesfile') + if util.is_code(file): + logging.error("'" + file + "' seems to contain code instead of a set of source file names.\n" + \ + "Please check your benchmark definition file or remove bracket '{' from this file.") + sys.exit() + + # read files from list + fileWithList = open(file, 'rt') + for line in fileWithList: + + # strip() removes 'newline' behind the line + line = line.strip() + + # ignore comments and empty lines + if not util.is_comment(line): + sourcefiles += self.expand_filename_pattern(line, os.path.dirname(file)) + + fileWithList.close() + + # remove excluded sourcefiles + for excludedFiles in sourcefilesTag.findall("exclude"): + excludedFilesList = self.expand_filename_pattern(excludedFiles.text, base_dir) + for excludedFile in excludedFilesList: + sourcefiles = util.remove_all(sourcefiles, excludedFile) + + for excludesFilesFile in sourcefilesTag.findall("excludesfile"): + for file in self.expand_filename_pattern(excludesFilesFile.text, base_dir): + # read files from list + fileWithList = open(file, 'rt') + for line in fileWithList: + + # strip() removes 'newline' behind the line + line = line.strip() + + # ignore comments and empty lines + if not util.is_comment(line): + excludedFilesList = self.expand_filename_pattern(line, os.path.dirname(file)) + for excludedFile in excludedFilesList: + sourcefiles = util.remove_all(sourcefiles, excludedFile) + + fileWithList.close() + + # add runs for cases without source files + for run in sourcefilesTag.findall("withoutfile"): + sourcefiles.append(run.text) + + # some runs need more than one sourcefile, + # the first sourcefile is a normal 'include'-file, we use its name as identifier for logfile and result-category + # all other files are 'append'ed. + sourcefilesLists = [] + appendFileTags = sourcefilesTag.findall("append") + for sourcefile in sourcefiles: + files = [sourcefile] + for appendFile in appendFileTags: + files.extend(self.expand_filename_pattern(appendFile.text, base_dir, sourcefile=sourcefile)) + sourcefilesLists.append(files) + + return sourcefilesLists + + + def expand_filename_pattern(self, pattern, base_dir, sourcefile=None): + """ + The function expand_filename_pattern expands a filename pattern to a sorted list + of filenames. The pattern can contain variables and wildcards. + If base_dir is given and pattern is not absolute, base_dir and pattern are joined. + """ + + # store pattern for fallback + shortFileFallback = pattern + + # replace vars like ${benchmark_path}, + # with converting to list and back, we can use the function 'substitute_vars()' + expandedPattern = substitute_vars([pattern], self, sourcefile) + assert len(expandedPattern) == 1 + expandedPattern = expandedPattern[0] + + if expandedPattern != pattern: + logging.debug("Expanded variables in expression {0} to {1}." + .format(repr(pattern), repr(expandedPattern))) + + fileList = util.expand_filename_pattern(expandedPattern, base_dir) + + # sort alphabetical, + fileList.sort() + + if not fileList: + logging.warning("No files found matching {0}." + .format(repr(pattern))) + + return fileList + + +class SourcefileSet(): + """ + A SourcefileSet contains a list of runs and a name. + """ + def __init__(self, name, index, runs): + self.real_name = name # this name is optional + self.name = name or str(index) # this name is always non-empty + self.runs = runs + + +_logged_missing_property_files = set() + + +class Run(): + """ + A Run contains some sourcefile, some options, propertyfiles and some other stuff, that is needed for the Run. + """ + + def __init__(self, sourcefiles, fileOptions, runSet, property_files=[], required_files=[]): + assert sourcefiles + self.identifier = sourcefiles[0] # used for name of logfile, substitution, result-category + self.sourcefiles = util.get_files(sourcefiles) # expand directories to get their sub-files + logging.debug("Creating Run with identifier '{0}' and files {1}".format(self.identifier, self.sourcefiles)) + self.runSet = runSet + self.specific_options = fileOptions # options that are specific for this run + self.log_file = runSet.log_folder + os.path.basename(self.identifier) + ".log" + self.required_files = required_files + + # lets reduce memory-consumption: if 2 lists are equal, do not use the second one + self.options = runSet.options + fileOptions if fileOptions else runSet.options # all options to be used when executing this run + substitutedOptions = substitute_vars(self.options, runSet, self.identifier) + if substitutedOptions != self.options: self.options = substitutedOptions # for less memory again + + # get propertyfile for Run: if available, use the last one + if property_files: + self.propertyfile = property_files[-1] + elif runSet.property_files: + self.propertyfile = runSet.property_files[-1] + else: + self.propertyfile = None + + # replace run-specific stuff in the propertyfile and add it to the set of required files + if self.propertyfile is None: + if not self.propertyfile in _logged_missing_property_files: + _logged_missing_property_files.add(self.propertyfile) + logging.warning('No propertyfile specified. Results for C programs will be handled as UNKNOWN.') + else: + # we check two cases: direct filename or user-defined substitution, one of them must be a 'file' + # TODO: do we need the second case? it is equal to previous used option "-spec ${sourcefile_path}/ALL.prp" + expandedPropertyFiles = util.expand_filename_pattern(self.propertyfile, self.runSet.benchmark.base_dir) + substitutedPropertyfiles = substitute_vars([self.propertyfile], runSet, self.identifier) + assert len(substitutedPropertyfiles) == 1 + + if expandedPropertyFiles: + self.propertyfile = expandedPropertyFiles[0] # take only the first one + elif substitutedPropertyfiles and os.path.isfile(substitutedPropertyfiles[0]): + self.propertyfile = substitutedPropertyfiles[0] + else: + if not self.propertyfile in _logged_missing_property_files: + _logged_missing_property_files.add(self.propertyfile) + logging.warning('Pattern {0} for sourcefile {1} in propertyfile tag did not match any file. It will be ignored.' + .format(self.propertyfile, self.identifier)) + self.propertyfile = None + + self.runSet.benchmark.add_required_file(self.propertyfile) + + # Copy columns for having own objects in run + # (we need this for storing the results in them). + self.columns = [Column(c.text, c.title, c.number_of_digits) for c in self.runSet.benchmark.columns] + + # here we store the optional result values, e.g. memory usage, energy, host name + # keys need to be strings, if first character is "@" the value is marked as hidden (e.g., debug info) + self.values = {} + + # dummy values, for output in case of interrupt + self.status = "" + self.cputime = None + self.walltime = None + self.category = result.CATEGORY_UNKNOWN + + + def cmdline(self): + args = self.runSet.benchmark.tool.cmdline( + self.runSet.benchmark.executable, self.options, self.sourcefiles, + self.propertyfile, self.runSet.benchmark.rlimits) + args = [os.path.expandvars(arg) for arg in args] + args = [os.path.expanduser(arg) for arg in args] + return args; + + + def after_execution(self, returnvalue, forceTimeout=False): + + rlimits = self.runSet.benchmark.rlimits + isTimeout = forceTimeout or self._is_timeout() + + # read output + try: + with open(self.log_file, 'rt') as outputFile: + output = outputFile.readlines() + # first 6 lines are for logging, rest is output of subprocess, see runexecutor.py for details + output = output[6:] + except IOError as e: + logging.warning("Cannot read log file: " + e.strerror) + output = [] + + if returnvalue is not None: + # calculation: returnvalue == (returncode * 256) + returnsignal + # highest bit of returnsignal shows only whether a core file was produced, we clear it + returnsignal = returnvalue & 0x7F + returncode = returnvalue >> 8 + logging.debug("My subprocess returned {0}, code {1}, signal {2}.".format(returnvalue, returncode, returnsignal)) + self.status = self.runSet.benchmark.tool.determine_result(returncode, returnsignal, output, isTimeout) + self.category = result.get_result_category(self.identifier, self.status, self.propertyfile) + self.runSet.benchmark.tool.add_column_values(output, self.columns) + + + # Tools sometimes produce a result even after a timeout. + # This should not be counted, so we overwrite the result with TIMEOUT + # here. if this is the case. + # However, we don't want to forget more specific results like SEGFAULT, + # so we do this only if the result is a "normal" one like TRUE. + if self.status in result.STATUS_LIST and isTimeout: + self.status = "TIMEOUT" + self.category = result.CATEGORY_ERROR + if returnvalue is not None \ + and returnsignal == 9 \ + and MEMLIMIT in rlimits \ + and 'memUsage' in self.values \ + and not self.values['memUsage'] is None \ + and int(self.values['memUsage']) >= (rlimits[MEMLIMIT] * _BYTE_FACTOR * _BYTE_FACTOR * 0.99): + self.status = 'OUT OF MEMORY' + self.category = result.CATEGORY_ERROR + + + def _is_timeout(self): + ''' try to find out whether the tool terminated because of a timeout ''' + rlimits = self.runSet.benchmark.rlimits + if SOFTTIMELIMIT in rlimits: + limit = rlimits[SOFTTIMELIMIT] + elif TIMELIMIT in rlimits: + limit = rlimits[TIMELIMIT] + else: + limit = float('inf') + + return self.cputime > limit*0.99 + + +class Column: + """ + The class Column contains text, title and number_of_digits of a column. + """ + + def __init__(self, text, title, numOfDigits): + self.text = text + self.title = title + self.number_of_digits = numOfDigits + self.value = "" + + +class Requirements: + ''' + This class wrappes the values for the requirements. + It parses the tags from XML to get those values. + If no values are found, at least the limits are used as requirements. + If the user gives a cpu_model in the config, it overrides the previous cpu_model. + ''' + def __init__(self, tags, rlimits, config): + + self.cpu_model = None + self.memory = None + self.cpu_cores = None + + for requireTag in tags: + + cpu_model = requireTag.get('cpuModel', None) + if self.cpu_model is None: + self.cpu_model = cpu_model + else: + raise Exception('Double specification of required CPU model.') + + cpu_cores = requireTag.get('cpuCores', None) + if self.cpu_cores is None: + if cpu_cores is not None: self.cpu_cores = int(cpu_cores) + else: + raise Exception('Double specification of required CPU cores.') + + memory = requireTag.get('memory', None) + if self.memory is None: + if memory is not None: self.memory = int(memory) + else: + raise Exception('Double specification of required memory.') + + # TODO check, if we have enough requirements to reach the limits + # TODO is this really enough? we need some overhead! + if self.cpu_cores is None: + self.cpu_cores = rlimits.get(CORELIMIT, None) + + if self.memory is None: + self.memory = rlimits.get(MEMLIMIT, None) + + if hasattr(config, 'cpu_model') and config.cpu_model is not None: + # user-given model -> override value + self.cpu_model = config.cpu_model + + if self.cpu_cores is not None and self.cpu_cores <= 0: + raise Exception('Invalid value {} for required CPU cores.'.format(self.cpu_cores)) + + if self.memory is not None and self.memory <= 0: + raise Exception('Invalid value {} for required memory.'.format(self.memory)) + + + def __str__(self): + s = "" + if self.cpu_model: + s += " CPU='" + self.cpu_model + "'" + if self.cpu_cores: + s += " Cores=" + str(self.cpu_cores) + if self.memory: + s += " Memory=" + str(self.memory) + "MB" + + return "Requirements:" + (s if s else " None") + + diff --git a/benchexec/oomhandler.py b/benchexec/oomhandler.py new file mode 100644 index 000000000..4ac3c0e37 --- /dev/null +++ b/benchexec/oomhandler.py @@ -0,0 +1,133 @@ +""" +CPAchecker is a tool for configurable software verification. +This file is part of CPAchecker. + +Copyright (C) 2007-2014 Dirk Beyer +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +CPAchecker web page: + http://cpachecker.sosy-lab.org +""" + +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals + +import logging +import os +import signal +import threading + +from . import util + +_BYTE_FACTOR = 1000 # byte in kilobyte + +class KillProcessOnOomThread(threading.Thread): + """ + Thread that kills the process when they run out of memory. + Usually the kernel would do this by itself, + but sometimes the process still hangs because it does not even have + enough memory left to get killed + (the memory limit also effects some kernel-internal memory related to our process). + So we disable the kernel-side killing, + and instead let the kernel notify us via an event when the cgroup ran out of memory. + Then we kill the process ourselves and increase the memory limit a little bit. + + The notification works by opening an "event file descriptor" with eventfd, + and telling the kernel to notify us about OOMs by writing the event file + descriptor and an file descriptor of the memory.oom_control file + to cgroup.event_control. + The kernel-side process killing is disabled by writing 1 to memory.oom_control. + Sources: + https://www.kernel.org/doc/Documentation/cgroups/memory.txt + https://access.redhat.com/site/documentation//en-US/Red_Hat_Enterprise_Linux/6/html/Resource_Management_Guide/sec-memory.html#ex-OOM-control-notifications + + @param cgroup: The memory cgroup the process is in + @param process: The process instance to kill + @param callbackFn: A one-argument function that is called in case of OOM with a string for the reason as argument + """ + def __init__(self, cgroup, process, callbackFn=lambda reason: None): + super(KillProcessOnOomThread, self).__init__() + self.daemon = True + self._finished = threading.Event() + self._process = process + self._cgroup = cgroup + self._callback = callbackFn + + ofd = os.open(os.path.join(cgroup, 'memory.oom_control'), os.O_WRONLY) + try: + from ctypes import cdll + libc = cdll.LoadLibrary('libc.so.6') + + # Important to use CLOEXEC, otherwise the benchmarked tool inherits + # the file descriptor. + EFD_CLOEXEC = 0x80000 # from + self._efd = libc.eventfd(0, EFD_CLOEXEC) + + try: + util.write_file('{} {}'.format(self._efd, ofd), + cgroup, 'cgroup.event_control') + + # If everything worked, disable Kernel-side process killing. + # This is not allowed if memory.use_hierarchy is enabled, + # but we don't care. + try: + os.write(ofd, '1'.encode('ascii')) + except OSError as e: + logging.debug("Failed to disable kernel-side OOM killer: error {0} ({1})".format(e.errno, e.strerror)) + except EnvironmentError as e: + os.close(self._efd) + raise e + finally: + os.close(ofd) + + def run(self): + try: + # In an eventfd, there are always 8 bytes + eventNumber = os.read(self._efd, 8) # blocks + # If read returned, this means the kernel sent us an event. + # It does so either on OOM or if the cgroup is removed. + if not self._finished.is_set(): + self._callback('memory') + logging.debug('Killing process {0} due to out-of-memory event from kernel.'.format(self._process.pid)) + util.kill_process(self._process.pid) + # Also kill all children of subprocesses directly. + with open(os.path.join(self._cgroup, 'tasks'), 'rt') as tasks: + for task in tasks: + util.kill_process(int(task)) + + # We now need to increase the memory limit of this cgroup + # to give the process a chance to terminate + self._reset_memory_limit('memory.memsw.limit_in_bytes') + self._reset_memory_limit('memory.limit_in_bytes') + + finally: + try: + os.close(self._efd) + except AttributeError: + pass # when the Python process is shutting down, "os" is sometimes already missing + + def _reset_memory_limit(self, limitFile): + if os.path.exists(os.path.join(self._cgroup, limitFile)): + try: + # Write a high value (1 PB) as the limit + util.write_file(str(1 * _BYTE_FACTOR * _BYTE_FACTOR * _BYTE_FACTOR * _BYTE_FACTOR * _BYTE_FACTOR), + self._cgroup, limitFile) + except IOError as e: + logging.warning('Failed to increase {0} after OOM: error {1} ({2})'.format(limitFile, e.errno, e.strerror)) + + + def cancel(self): + self._finished.set() diff --git a/benchexec/outputhandler.py b/benchexec/outputhandler.py new file mode 100644 index 000000000..44225fb40 --- /dev/null +++ b/benchexec/outputhandler.py @@ -0,0 +1,616 @@ +""" +CPAchecker is a tool for configurable software verification. +This file is part of CPAchecker. + +Copyright (C) 2007-2014 Dirk Beyer +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +CPAchecker web page: + http://cpachecker.sosy-lab.org +""" + +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals + +import threading +import time +import sys +import os +import xml.etree.ElementTree as ET + +from .model import MEMLIMIT, TIMELIMIT, SOFTTIMELIMIT, CORELIMIT +from . import filewriter +from . import result +from . import util as util + +# colors for column status in terminal +USE_COLORS = True +COLOR_GREEN = "\033[32;1m{0}\033[m" +COLOR_RED = "\033[31;1m{0}\033[m" +COLOR_ORANGE = "\033[33;1m{0}\033[m" +COLOR_MAGENTA = "\033[35;1m{0}\033[m" +COLOR_DEFAULT = "{0}" +UNDERLINE = "\033[4m{0}\033[0m" + +COLOR_DIC = {result.CATEGORY_CORRECT: COLOR_GREEN, + result.CATEGORY_WRONG: COLOR_RED, + result.CATEGORY_UNKNOWN: COLOR_ORANGE, + result.CATEGORY_ERROR: COLOR_MAGENTA, + result.CATEGORY_MISSING: COLOR_DEFAULT, + None: COLOR_DEFAULT} + +LEN_OF_STATUS = 22 + +TERMINAL_TITLE='' +_term = os.environ.get('TERM', '') +if _term.startswith(('xterm', 'rxvt')): + TERMINAL_TITLE = "\033]0;Benchmark {0}\007" +elif _term.startswith('screen'): + TERMINAL_TITLE = "\033kBenchmark {0}\033\\" + +# the number of digits after the decimal separator for text output of time columns with times +TIME_PRECISION = 2 + + +class OutputHandler: + """ + The class OutputHandler manages all outputs to the terminal and to files. + """ + + print_lock = threading.Lock() + + def __init__(self, benchmark, sysinfo): + """ + The constructor of OutputHandler collects information about the benchmark and the computer. + """ + + self.all_created_files = [] + self.benchmark = benchmark + self.statistics = Statistics() + self.runSet = None + + version = self.benchmark.tool_version + + memlimit = None + timelimit = None + corelimit = None + if MEMLIMIT in self.benchmark.rlimits: + memlimit = str(self.benchmark.rlimits[MEMLIMIT]) + " MB" + if SOFTTIMELIMIT in self.benchmark.rlimits: + timelimit = str(self.benchmark.rlimits[SOFTTIMELIMIT]) + " s" + elif TIMELIMIT in self.benchmark.rlimits: + timelimit = str(self.benchmark.rlimits[TIMELIMIT]) + " s" + if CORELIMIT in self.benchmark.rlimits: + corelimit = str(self.benchmark.rlimits[CORELIMIT]) + + # create folder for file-specific log-files. + os.makedirs(benchmark.log_folder) + + self.store_header_in_xml(version, memlimit, timelimit, corelimit) + self.write_header_to_log(version, memlimit, timelimit, corelimit, sysinfo) + + if sysinfo: + # store systemInfo in XML + self.store_system_info(sysinfo.os, sysinfo.cpu_model, + sysinfo.cpu_number_of_cores, sysinfo.cpu_max_frequency, + sysinfo.memory, sysinfo.hostname) + self.xml_file_names = [] + + + def store_system_info(self, opSystem, cpu_model, cpu_number_of_cores, cpu_max_frequency, memory, hostname): + for systemInfo in self.xml_header.findall("systeminfo"): + if systemInfo.attrib["hostname"] == hostname: + return + + osElem = ET.Element("os", {"name":opSystem}) + cpuElem = ET.Element("cpu", {"model":cpu_model, "cores":cpu_number_of_cores, "frequency":cpu_max_frequency}) + ramElem = ET.Element("ram", {"size":memory}) + systemInfo = ET.Element("systeminfo", {"hostname":hostname}) + systemInfo.append(osElem) + systemInfo.append(cpuElem) + systemInfo.append(ramElem) + + self.xml_header.append(systemInfo) + if self.runSet and self.runSet.xml: + self.runSet.xml.append(systemInfo) + + + def set_error(self, msg): + """ + Mark the benchmark as erroneous, e.g., because the benchmarking tool crashed. + The message is intended as explanation for the user. + """ + self.xml_header.set('error', msg if msg else 'unknown error') + if self.runSet: + self.runSet.xml.set('error', msg if msg else 'unknown error') + + + def store_header_in_xml(self, version, memlimit, timelimit, corelimit): + + # store benchmarkInfo in XML + self.xml_header = ET.Element("result", + {"benchmarkname": self.benchmark.name, + "date": time.strftime("%y-%m-%d %H:%M", self.benchmark.start_time), + "tool": self.benchmark.tool_name, "version": version}) + + self.xml_header.set(MEMLIMIT, memlimit if memlimit else '-') + self.xml_header.set(TIMELIMIT, timelimit if timelimit else '-') + self.xml_header.set(CORELIMIT, corelimit if corelimit else '-') + + # store columnTitles in XML, this are the default columns, that are shown in a default html-table from table-generator + columntitlesElem = ET.Element("columns") + columntitlesElem.append(ET.Element("column", {"title": "status"})) + columntitlesElem.append(ET.Element("column", {"title": "cputime"})) + columntitlesElem.append(ET.Element("column", {"title": "walltime"})) + for column in self.benchmark.columns: + columnElem = ET.Element("column", {"title": column.title}) + columntitlesElem.append(columnElem) + self.xml_header.append(columntitlesElem) + + # Build dummy entries for output, later replaced by the results, + # The dummy XML elements are shared over all runs. + self.xml_dummy_elements = [ET.Element("column", {"title": "status", "value": ""}), + ET.Element("column", {"title": "cputime", "value": ""}), + ET.Element("column", {"title": "walltime", "value": ""})] + for column in self.benchmark.columns: + self.xml_dummy_elements.append(ET.Element("column", + {"title": column.title, "value": ""})) + + + def write_header_to_log(self, version, memlimit, timelimit, corelimit, sysinfo): + """ + This method writes information about benchmark and system into txt_file. + """ + + columnWidth = 20 + simpleLine = "-" * (60) + "\n\n" + + header = " BENCHMARK INFORMATION\n"\ + + "benchmark:".ljust(columnWidth) + self.benchmark.name + "\n"\ + + "date:".ljust(columnWidth) + time.strftime("%a, %Y-%m-%d %H:%M:%S %Z", self.benchmark.start_time) + "\n"\ + + "tool:".ljust(columnWidth) + self.benchmark.tool_name\ + + " " + version + "\n" + + if memlimit: + header += "memlimit:".ljust(columnWidth) + memlimit + "\n" + if timelimit: + header += "timelimit:".ljust(columnWidth) + timelimit + "\n" + if corelimit: + header += "CPU cores used:".ljust(columnWidth) + corelimit + "\n" + header += simpleLine + + if sysinfo: + header += " SYSTEM INFORMATION\n"\ + + "host:".ljust(columnWidth) + sysinfo.hostname + "\n"\ + + "os:".ljust(columnWidth) + sysinfo.os + "\n"\ + + "cpu:".ljust(columnWidth) + sysinfo.cpu_model + "\n"\ + + "- cores:".ljust(columnWidth) + sysinfo.cpu_number_of_cores + "\n"\ + + "- max frequency:".ljust(columnWidth) + sysinfo.cpu_max_frequency + "\n"\ + + "ram:".ljust(columnWidth) + sysinfo.memory + "\n"\ + + simpleLine + + self.description = header + + runSetName = None + run_sets = [runSet for runSet in self.benchmark.run_sets if runSet.should_be_executed()] + if len(run_sets) == 1: + # in case there is only a single run set to to execute, we can use its name + runSetName = run_sets[0].name + + # write to file + txt_file_name = self.get_filename(runSetName, "txt") + self.txt_file = filewriter.FileWriter(txt_file_name, self.description) + self.all_created_files.append(txt_file_name) + + + def output_before_run_set(self, runSet): + """ + The method output_before_run_set() calculates the length of the + first column for the output in terminal and stores information + about the runSet in XML. + @param runSet: current run set + """ + + self.runSet = runSet + + sourcefiles = [run.identifier for run in runSet.runs] + + # common prefix of file names + self.common_prefix = util.common_base_dir(sourcefiles) + os.path.sep + + # length of the first column in terminal + self.max_length_of_filename = max(len(file) for file in sourcefiles) if sourcefiles else 20 + self.max_length_of_filename = max(20, self.max_length_of_filename - len(self.common_prefix)) + + # write run set name to terminal + numberOfFiles = len(runSet.runs) + numberOfFilesStr = (" (1 file)" if numberOfFiles == 1 + else " ({0} files)".format(numberOfFiles)) + util.printOut("\nexecuting run set" + + (" '" + runSet.name + "'" if runSet.name else "") + + numberOfFilesStr + + (TERMINAL_TITLE.format(runSet.full_name) if USE_COLORS and sys.stdout.isatty() else "")) + + # write information about the run set into txt_file + self.writeRunSetInfoToLog(runSet) + + # prepare information for text output + for run in runSet.runs: + run.resultline = self.format_sourcefile_name(run.identifier) + + # prepare XML structure for each run and runSet + run.xml = ET.Element("sourcefile", + {"name": run.identifier, "files": "[" + ", ".join(run.sourcefiles) + "]"}) + if run.specific_options: + run.xml.set("options", " ".join(run.specific_options)) + run.xml.extend(self.xml_dummy_elements) + + runSet.xml = self.runs_to_xml(runSet, runSet.runs) + + # write (empty) results to txt_file and XML + self.txt_file.append(self.run_set_to_text(runSet), False) + xml_file_name = self.get_filename(runSet.name, "xml") + self.xml_file = filewriter.FileWriter(xml_file_name, + util.xml_to_string(runSet.xml)) + self.xml_file.lastModifiedTime = time.time() + self.all_created_files.append(xml_file_name) + self.xml_file_names.append(xml_file_name) + + + def output_for_skipping_run_set(self, runSet, reason=None): + ''' + This function writes a simple message to terminal and logfile, + when a run set is skipped. + There is no message about skipping a run set in the xml-file. + ''' + + # print to terminal + util.printOut("\nSkipping run set" + + (" '" + runSet.name + "'" if runSet.name else "") + + (" " + reason if reason else "") + ) + + # write into txt_file + runSetInfo = "\n\n" + if runSet.name: + runSetInfo += runSet.name + "\n" + runSetInfo += "Run set {0} of {1}: skipped {2}\n".format( + runSet.index, len(self.benchmark.run_sets), reason or "") + self.txt_file.append(runSetInfo) + + + def writeRunSetInfoToLog(self, runSet): + """ + This method writes the information about a run set into the txt_file. + """ + + runSetInfo = "\n\n" + if runSet.name: + runSetInfo += runSet.name + "\n" + runSetInfo += "Run set {0} of {1} with options '{2}' and propertyfile '{3}'\n\n".format( + runSet.index, len(self.benchmark.run_sets), + " ".join(runSet.options), + " ".join(runSet.property_files)) + + titleLine = self.create_output_line("sourcefile", "status", "cpu time", + "wall time", "host", self.benchmark.columns, True) + + runSet.simpleLine = "-" * (len(titleLine)) + + runSetInfo += titleLine + "\n" + runSet.simpleLine + "\n" + + # write into txt_file + self.txt_file.append(runSetInfo) + + + def output_before_run(self, run): + """ + The method output_before_run() prints the name of a file to terminal. + It returns the name of the logfile. + @param run: a Run object + """ + # output in terminal + try: + OutputHandler.print_lock.acquire() + + timeStr = time.strftime("%H:%M:%S", time.localtime()) + " " + progressIndicator = " ({0}/{1})".format(self.runSet.runs.index(run), len(self.runSet.runs)) + terminalTitle = TERMINAL_TITLE.format(self.runSet.full_name + progressIndicator) if USE_COLORS and sys.stdout.isatty() else "" + if self.benchmark.num_of_threads == 1: + util.printOut(terminalTitle + timeStr + self.format_sourcefile_name(run.identifier), '') + else: + util.printOut(terminalTitle + timeStr + "starting " + self.format_sourcefile_name(run.identifier)) + finally: + OutputHandler.print_lock.release() + + # get name of file-specific log-file + self.all_created_files.append(run.log_file) + + + def output_after_run(self, run): + """ + The method output_after_run() prints filename, result, time and status + of a run to terminal and stores all data in XML + """ + + # format times, type is changed from float to string! + cputime_str = util.format_number(run.cputime, TIME_PRECISION) + walltime_str = util.format_number(run.walltime, TIME_PRECISION) + + # format numbers, number_of_digits is optional, so it can be None + for column in run.columns: + if column.number_of_digits is not None: + + # if the number ends with "s" or another letter, remove it + if (not column.value.isdigit()) and column.value[-2:-1].isdigit(): + column.value = column.value[:-1] + + try: + floatValue = float(column.value) + column.value = util.format_number(floatValue, column.number_of_digits) + except ValueError: # if value is no float, don't format it + pass + + # store information in run + run.resultline = self.create_output_line(run.identifier, run.status, + cputime_str, walltime_str, run.values.get('host'), + run.columns) + self.add_values_to_run_xml(run) + + # output in terminal/console + if USE_COLORS and sys.stdout.isatty(): # is terminal, not file + statusStr = COLOR_DIC[run.category].format(run.status.ljust(LEN_OF_STATUS)) + else: + statusStr = run.status.ljust(LEN_OF_STATUS) + + try: + OutputHandler.print_lock.acquire() + + valueStr = statusStr + cputime_str.rjust(8) + walltime_str.rjust(8) + if self.benchmark.num_of_threads == 1: + util.printOut(valueStr) + else: + timeStr = time.strftime("%H:%M:%S", time.localtime()) + " "*14 + util.printOut(timeStr + self.format_sourcefile_name(run.identifier) + valueStr) + + # write result in txt_file and XML + self.txt_file.append(self.run_set_to_text(run.runSet), False) + self.statistics.add_result(run.category, run.status) + + # we don't want to write this file to often, it can slow down the whole script, + # so we wait at least 10 seconds between two write-actions + currentTime = time.time() + if currentTime - self.xml_file.lastModifiedTime > 10: + self.xml_file.replace(util.xml_to_string(run.runSet.xml)) + self.xml_file.lastModifiedTime = currentTime + + finally: + OutputHandler.print_lock.release() + + + def output_after_run_set(self, runSet, cputime=None, walltime=None, energy={}): + """ + The method output_after_run_set() stores the times of a run set in XML. + @params cputime, walltime: accumulated times of the run set + """ + + self.add_values_to_run_set_xml(runSet, cputime, walltime, energy) + + # write results to files + self.xml_file.replace(util.xml_to_string(runSet.xml)) + + if len(runSet.blocks) > 1: + for block in runSet.blocks: + blockFileName = self.get_filename(runSet.name, block.name + ".xml") + util.write_file( + util.xml_to_string(self.runs_to_xml(runSet, block.runs, block.name)), + blockFileName + ) + self.all_created_files.append(blockFileName) + + self.txt_file.append(self.run_set_to_text(runSet, True, cputime, walltime, energy)) + + + def run_set_to_text(self, runSet, finished=False, cputime=0, walltime=0, energy={}): + lines = [] + + # store values of each run + for run in runSet.runs: lines.append(run.resultline) + + lines.append(runSet.simpleLine) + + # write endline into txt_file + if finished: + endline = ("Run set {0}".format(runSet.index)) + + # format time, type is changed from float to string! + cputime_str = "None" if cputime is None else util.format_number(cputime, TIME_PRECISION) + walltime_str = "None" if walltime is None else util.format_number(walltime, TIME_PRECISION) + lines.append(self.create_output_line(endline, "done", cputime_str, + walltime_str, "-", [])) + + return "\n".join(lines) + "\n" + + def runs_to_xml(self, runSet, runs, blockname=None): + """ + This function creates the XML structure for a list of runs + """ + # copy benchmarkinfo, limits, columntitles, systeminfo from xml_header + runsElem = util.copy_of_xml_element(self.xml_header) + runsElem.set("options", " ".join(runSet.options)) + runsElem.set("propertyfiles", " ".join(runSet.property_files)) + if blockname is not None: + runsElem.set("block", blockname) + runsElem.set("name", ((runSet.real_name + ".") if runSet.real_name else "") + blockname) + elif runSet.real_name: + runsElem.set("name", runSet.real_name) + + # collect XMLelements from all runs + for run in runs: runsElem.append(run.xml) + + return runsElem + + + def add_values_to_run_xml(self, run): + """ + This function adds the result values to the XML representation of a run. + """ + runElem = run.xml + for elem in list(runElem): + runElem.remove(elem) + self.add_column_to_xml(runElem, 'status', run.status) + if run.cputime is not None: + self.add_column_to_xml(runElem, 'cputime', str(run.cputime) + 's') + if run.walltime is not None: + self.add_column_to_xml(runElem, 'walltime', str(run.walltime) + 's') + self.add_column_to_xml(runElem, '@category', run.category) # hidden + self.add_column_to_xml(runElem, '', run.values) + + for column in run.columns: + self.add_column_to_xml(runElem, column.title, column.value) + + + def add_values_to_run_set_xml(self, runSet, cputime, walltime, energy): + """ + This function adds the result values to the XML representation of a runSet. + """ + self.add_column_to_xml(runSet.xml, 'cputime', cputime) + self.add_column_to_xml(runSet.xml, 'walltime', walltime) + self.add_column_to_xml(runSet.xml, 'energy', energy) + + + def add_column_to_xml(self, xml, title, value, prefix=""): + if value is None: + return + + if isinstance(value, dict): + for key, value in value.items(): + if prefix: + common_prefix = prefix + '_' + title + else: + common_prefix = title + self.add_column_to_xml(xml, key, value, prefix=common_prefix) + return + + # default case: add columns + if prefix: + if prefix.startswith('@'): + attributes = {"title": prefix[1:] + '_' + title, "value": str(value), "hidden": "true"} + else: + attributes = {"title": prefix + '_' + title, "value": str(value)} + else: + if title.startswith('@'): + attributes = {"title": title[1:], "value": str(value), "hidden": "true"} + else: + attributes = {"title": title, "value": str(value)} + xml.append(ET.Element("column", attributes)) + + + def create_output_line(self, sourcefile, status, cputime_delta, walltime_delta, host, columns, isFirstLine=False): + """ + @param sourcefile: title of a sourcefile + @param status: status of programm + @param cputime_delta: time from running the programm + @param walltime_delta: time from running the programm + @param columns: list of columns with a title or a value + @param isFirstLine: boolean for different output of headline and other lines + @return: a line for the outputFile + """ + + lengthOfTime = 12 + lengthOfEnergy = 18 + minLengthOfColumns = 8 + + outputLine = self.format_sourcefile_name(sourcefile) + \ + status.ljust(LEN_OF_STATUS) + \ + cputime_delta.rjust(lengthOfTime) + \ + walltime_delta.rjust(lengthOfTime) + \ + str(host).rjust(lengthOfTime) + + for column in columns: + columnLength = max(minLengthOfColumns, len(column.title)) + 2 + + if isFirstLine: + value = column.title + else: + value = column.value + + outputLine = outputLine + str(value).rjust(columnLength) + + return outputLine + + + def output_after_benchmark(self, isStoppedByInterrupt): + self.statistics.print_to_terminal() + + if self.xml_file_names: + tableGeneratorName = 'table-generator.py' + tableGeneratorPath = os.path.relpath(os.path.join(os.path.dirname(__file__), os.path.pardir, tableGeneratorName)) + if tableGeneratorPath == tableGeneratorName: + tableGeneratorPath = './' + tableGeneratorName + util.printOut("In order to get HTML and CSV tables, run\n{0} '{1}'" + .format(tableGeneratorPath, "' '".join(self.xml_file_names))) + + if isStoppedByInterrupt: + util.printOut("\nScript was interrupted by user, some runs may not be done.\n") + + + def get_filename(self, runSetName, fileExtension): + ''' + This function returns the name of the file for a run set + with an extension ("txt", "xml"). + ''' + + fileName = self.benchmark.output_base_name + ".results." + + if runSetName: + fileName += runSetName + "." + + return fileName + fileExtension + + + def format_sourcefile_name(self, fileName): + ''' + Formats the file name of a program for printing on console. + ''' + fileName = fileName.replace(self.common_prefix, '', 1) + return fileName.ljust(self.max_length_of_filename + 4) + + +class Statistics: + + def __init__(self): + self.dic = dict((category,0) for category in COLOR_DIC) + self.dic[(result.CATEGORY_WRONG, result.STATUS_TRUE_PROP)] = 0 + self.counter = 0 + + def add_result(self, category, status): + self.counter += 1 + assert category in self.dic + if category == result.CATEGORY_WRONG and status == result.STATUS_TRUE_PROP: + self.dic[(result.CATEGORY_WRONG, result.STATUS_TRUE_PROP)] += 1 + self.dic[category] += 1 + + + def print_to_terminal(self): + util.printOut('\n'.join(['\nStatistics:' + str(self.counter).rjust(13) + ' Files', + ' correct: ' + str(self.dic[result.CATEGORY_CORRECT]).rjust(4), + ' unknown: ' + str(self.dic[result.CATEGORY_UNKNOWN] + self.dic[result.CATEGORY_ERROR]).rjust(4), + ' false positives:' + str(self.dic[result.CATEGORY_WRONG] - self.dic[(result.CATEGORY_WRONG, result.STATUS_TRUE_PROP)]).rjust(4) + \ + ' (result is false, file is true or has a different false-property)', + ' false negatives:' + str(self.dic[(result.CATEGORY_WRONG, result.STATUS_TRUE_PROP)]).rjust(4) + \ + ' (result is true, file is false)', + ''])) diff --git a/benchexec/result.py b/benchexec/result.py new file mode 100644 index 000000000..61c0960e9 --- /dev/null +++ b/benchexec/result.py @@ -0,0 +1,200 @@ +""" +CPAchecker is a tool for configurable software verification. +This file is part of CPAchecker. + +Copyright (C) 2007-2014 Dirk Beyer +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +CPAchecker web page: + http://cpachecker.sosy-lab.org +""" + +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals +from . import util +import os + + +# CONSTANTS + +CATEGORY_CORRECT = 'correct' +CATEGORY_WRONG = 'wrong' +CATEGORY_UNKNOWN = 'unknown' +CATEGORY_ERROR = 'error' +CATEGORY_MISSING = 'missing' + +STR_TRUE = 'true' +STATUS_UNKNOWN = 'unknown' +STR_FALSE = 'false' # only for special cases. STR_FALSE is no official result, because property is missing + +PROP_REACH = 'reach' +PROP_TERMINATION = 'termination' +PROP_DEREF = 'valid-deref' +PROP_FREE = 'valid-free' +PROP_MEMTRACK = 'valid-memtrack' + +# next lines are for public usage in tool-wrappers or in the scripts +STATUS_TRUE_PROP = STR_TRUE +STATUS_FALSE_REACH = STR_FALSE + '(' + PROP_REACH + ')' +STATUS_FALSE_TERMINATION = STR_FALSE + '(' + PROP_TERMINATION + ')' +STATUS_FALSE_DEREF = STR_FALSE + '(' + PROP_DEREF + ')' +STATUS_FALSE_FREE = STR_FALSE + '(' + PROP_FREE + ')' +STATUS_FALSE_MEMTRACK = STR_FALSE + '(' + PROP_MEMTRACK + ')' + + +# list of all public constants, +# if a tool-result is not in this list, it is handled as CATEGORY_ERROR +STATUS_LIST = [STATUS_TRUE_PROP, STATUS_UNKNOWN, + STATUS_FALSE_REACH, STATUS_FALSE_TERMINATION, + STATUS_FALSE_DEREF, STATUS_FALSE_FREE, STATUS_FALSE_MEMTRACK] + +# string used to recognize java programs +JAVA_CHECK_SUBSTRING = '_assert' + +# strings searched in filenames to determine correct or incorrect status. +# use lower case! the dict contains assignments 'substring' --> 'partial statuses' +SUBSTRINGS = { + '_true-unreach-label': (STR_TRUE, [PROP_REACH]), + '_true-unreach-call': (STR_TRUE, [PROP_REACH]), + '_true_assert': (STR_TRUE, [PROP_REACH]), + '_true-termination': (STR_TRUE, [PROP_TERMINATION]), + '_true-valid-deref': (STR_TRUE, [PROP_DEREF]), + '_true-valid-free': (STR_TRUE, [PROP_FREE]), + '_true-valid-memtrack': (STR_TRUE, [PROP_MEMTRACK]), + '_true-valid-memsafety': (STR_TRUE, [PROP_DEREF, PROP_FREE, PROP_MEMTRACK]), + + '_false-unreach-label': (STR_FALSE, [PROP_REACH]), + '_false-unreach-call': (STR_FALSE, [PROP_REACH]), + '_false_assert': (STR_FALSE, [PROP_REACH]), + '_false-termination': (STR_FALSE, [PROP_TERMINATION]), + '_false-valid-deref': (STR_FALSE, [PROP_DEREF]), + '_false-valid-free': (STR_FALSE, [PROP_FREE]), + '_false-valid-memtrack': (STR_FALSE, [PROP_MEMTRACK]) + } + + +# this map contains substring of property-files with their status +PROPERTY_MATCHER = {'LTL(G ! label(': PROP_REACH, + 'LTL(G ! call(__VERIFIER_error()))': PROP_REACH, + 'LTL(F end)': PROP_TERMINATION, + 'LTL(G valid-free)': PROP_FREE, + 'LTL(G valid-deref)': PROP_DEREF, + 'LTL(G valid-memtrack)': PROP_MEMTRACK, + 'OBSERVER AUTOMATON': PROP_REACH + } + + +# Score values taken from http://sv-comp.sosy-lab.org/ +SCORE_CORRECT_TRUE = 2 +SCORE_CORRECT_FALSE = 1 +SCORE_UNKNOWN = 0 +SCORE_WRONG_FALSE = -6 +SCORE_WRONG_TRUE = -12 + + +def _statuses_of_file(filename): + """ + This function returns a list of all properties in the filename. + """ + statuses = [] + for (substr, props) in SUBSTRINGS.items(): + if substr in filename: + statuses.extend((props[0], prop) for prop in props[1]) + return statuses + + +def _statuses_of_property_file(propertyFile): + assert os.path.isfile(propertyFile) + + statuses = [] + with open(propertyFile) as f: + content = f.read() + assert 'CHECK' in content or 'OBSERVER' in content, "Invalid property {0}".format(content) + + # TODO: should we switch to regex or line-based reading? + for substring, status in PROPERTY_MATCHER.items(): + if substring in content: + statuses.append(status) + + assert statuses, "Unkown property {0}".format(content) + return statuses + +def _file_is_java(filename): + return JAVA_CHECK_SUBSTRING in filename + +def get_result_category(filename, status, propertyFile=None): + ''' + This function return a string + that shows the relation between status and file. + ''' + + if status == STATUS_UNKNOWN: + category = CATEGORY_UNKNOWN + elif status in STATUS_LIST: + + # Currently, no properties for checking Java programs exist, so we only check for PROP_REACH for these + if _file_is_java(filename): + fileStatuses = _statuses_of_file(filename) + + if not fileStatuses: + category = CATEGORY_MISSING + elif all(prop is not PROP_REACH or s in status for (s, prop) in fileStatuses): + category = CATEGORY_CORRECT + else: + category = CATEGORY_WRONG + + # Without propertyfile we do not return correct or wrong results, but always UNKNOWN. + elif propertyFile is None: + category = CATEGORY_MISSING + else: + fileStatuses = _statuses_of_file(filename) + propertiesToCheck = _statuses_of_property_file(propertyFile) + + searchedProperties = [] + for (flag,prop) in fileStatuses: + if prop in propertiesToCheck: + searchedProperties.append(flag + '(' + prop + ')') # format must match with above! + + if not searchedProperties: + # filename gives no hint on the searched bug or + # we are searching for a property, that has nothing to do with the file + category = CATEGORY_MISSING + + elif status is STR_TRUE: + if all(prop.startswith(STR_TRUE) for prop in searchedProperties): + category = CATEGORY_CORRECT + else: + category = CATEGORY_WRONG + + elif status in searchedProperties: + category = CATEGORY_CORRECT + else: + category = CATEGORY_WRONG + + else: + category = CATEGORY_ERROR + return category + + +def calculate_score(category, status): + if category == CATEGORY_CORRECT: + return SCORE_CORRECT_TRUE if status == STATUS_TRUE_PROP else SCORE_CORRECT_FALSE + elif category == CATEGORY_WRONG: + return SCORE_WRONG_TRUE if status == STATUS_TRUE_PROP else SCORE_WRONG_FALSE + elif category in [CATEGORY_UNKNOWN, CATEGORY_ERROR, CATEGORY_MISSING]: + return SCORE_UNKNOWN + else: + assert False, 'impossible category {0}'.format(category) diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py new file mode 100644 index 000000000..7d7a6875e --- /dev/null +++ b/benchexec/runexecutor.py @@ -0,0 +1,765 @@ +""" +CPAchecker is a tool for configurable software verification. +This file is part of CPAchecker. + +Copyright (C) 2007-2014 Dirk Beyer +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +CPAchecker web page: + http://cpachecker.sosy-lab.org +""" + +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals + +import argparse +import logging +import multiprocessing +import os +import resource +import signal +import subprocess +import sys +import threading +import time + +from . import util as util +from .cgroups import * +from . import oomhandler + +read_file = util.read_file +write_file = util.write_file + +CPUACCT = 'cpuacct' +CPUSET = 'cpuset' +MEMORY = 'memory' + +_WALLTIME_LIMIT_DEFAULT_OVERHEAD = 30 # seconds more than cputime limit +_BYTE_FACTOR = 1000 # byte in kilobyte + + +def main(argv=None): + """ + A simple command-line interface for the runexecutor module of BenchExec. + """ + if argv is None: + argv = sys.argv + + # parse options + parser = argparse.ArgumentParser(description= + "Run a command with resource limits and measurements.") + parser.add_argument("args", nargs="+", metavar="ARG", + help='command line to run (prefix with "--" to ensure all arguments are treated correctly)') + parser.add_argument("--output", default="output.log", metavar="FILE", + help="file name for file with command output") + parser.add_argument("--maxOutputSize", type=int, metavar="BYTES", + help="approximate size of command output after which it will be truncated") + parser.add_argument("--memlimit", type=int, metavar="BYTES", + help="memory limit in bytes") + parser.add_argument("--timelimit", type=int, metavar="SECONDS", + help="CPU time limit in seconds") + parser.add_argument("--softtimelimit", type=int, metavar="SECONDS", + help='"soft" CPU time limit in seconds') + parser.add_argument("--walltimelimit", type=int, metavar="SECONDS", + help='wall time limit in seconds (default is CPU time plus a few seconds)') + parser.add_argument("--cores", type=util.parse_int_list, metavar="N,M-K", + help="the list of CPU cores to use") + parser.add_argument("--memoryNodes", type=util.parse_int_list, metavar="N,M-K", + help="the list of memory nodes to use") + parser.add_argument("--dir", metavar="DIR", + help="working directory for executing the command (default is current directory)") + verbosity = parser.add_mutually_exclusive_group() + verbosity.add_argument("--debug", action="store_true", + help="Show debug output") + verbosity.add_argument("--quiet", action="store_true", + help="Show only warnings") + options = parser.parse_args(argv[1:]) + + # For integrating into some benchmarking frameworks, + # there is a DEPRECATED special mode + # where the first and only command-line argument is a serialized dict + # with additional options + env = {} + if len(options.args) == 1 and options.args[0].startswith("{"): + data = eval(options.args[0]) + options.args = data["args"] + env = data.get("env", {}) + options.debug = data.get("debug", options.debug) + if "maxLogfileSize" in data: + options.maxOutputSize = data["maxLogfileSize"] * _BYTE_FACTOR * _BYTE_FACTOR # MB to bytes + + # setup logging + logLevel = logging.INFO + if options.debug: + logLevel = logging.DEBUG + elif options.quiet: + logLevel = logging.WARNING + logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", + level=logLevel) + + executor = RunExecutor() + + # ensure that process gets killed on interrupt/kill signal + def signal_handler_kill(signum, frame): + executor.stop() + signal.signal(signal.SIGTERM, signal_handler_kill) + signal.signal(signal.SIGINT, signal_handler_kill) + + logging.info('Starting command ' + ' '.join(options.args)) + logging.info('Writing output to ' + options.output) + + # actual run execution + result = \ + executor.execute_run(args=options.args, + output_filename=options.output, + hardtimelimit=options.timelimit, + softtimelimit=options.softtimelimit, + walltimelimit=options.walltimelimit, + cores=options.cores, + memlimit=options.memlimit, + memory_nodes=options.memoryNodes, + environments=env, + workingDir=options.dir, + maxLogfileSize=options.maxOutputSize) + + # exit_code is a special number: + # It is a 16bit int of which the lowest 7 bit are the signal number, + # and the high byte is the real exit code of the process (here 0). + exit_code = result['exitcode'] + return_value = exit_code // 256 + exitSignal = exit_code % 128 + + def print_optional_result(key): + if key in result: + # avoid unicode literals such that the string can be parsed by Python 3.2 + print(key + "=" + str(result[key]).replace("'u", '')) + + # output results + print_optional_result('terminationreason') + print("exitcode=" + str(exit_code)) + if (exitSignal == 0) or (return_value != 0): + print("returnvalue=" + str(return_value)) + if exitSignal != 0 : + print("exitsignal=" + str(exitSignal)) + print("walltime=" + str(result['walltime']) + "s") + print("cputime=" + str(result['cputime']) + "s") + print_optional_result('memory') + if 'energy' in result: + for key, value in result['energy'].items(): + print("energy-{0}={1}".format(key, value)) + + +class RunExecutor(): + + def __init__(self): + self.PROCESS_KILLED = False + self.SUB_PROCESSES_LOCK = threading.Lock() # needed, because we kill the process asynchronous + self.SUB_PROCESSES = set() + self._termination_reason = None + + self._init_cgroups() + + + def _init_cgroups(self): + """ + This function initializes the cgroups for the limitations. + Please call it before any calls to execute_run(), + if you want to separate initialization from actual run execution + (e.g., for better error message handling). + """ + self.cgroupsParents = {} # contains the roots of all cgroup-subsystems + + init_cgroup(self.cgroupsParents, CPUACCT) + if not self.cgroupsParents[CPUACCT]: + logging.warning('Without cpuacct cgroups, cputime measurement and limit might not work correctly if subprocesses are started.') + + init_cgroup(self.cgroupsParents, MEMORY) + if not self.cgroupsParents[MEMORY]: + logging.warning('Cannot measure memory consumption without memory cgroup.') + + init_cgroup(self.cgroupsParents, CPUSET) + + self.cpus = None # to indicate that we cannot limit cores + self.memory_nodes = None # to indicate that we cannot limit cores + cgroupCpuset = self.cgroupsParents[CPUSET] + if cgroupCpuset: + # Read available cpus/memory nodes: + cpuStr = read_file(cgroupCpuset, 'cpuset.cpus') + try: + self.cpus = util.parse_int_list(cpuStr) + except ValueError as e: + logging.warning("Could not read available CPU cores from kernel: {0}".format(e.strerror)) + logging.debug("List of available CPU cores is {0}.".format(self.cpus)) + + memsStr = read_file(cgroupCpuset, 'cpuset.mems') + try: + self.memory_nodes = util.parse_int_list(memsStr) + except ValueError as e: + logging.warning("Could not read available memory nodes from kernel: {0}".format(e.strerror)) + logging.debug("List of available memory nodes is {0}.".format(self.memory_nodes)) + + + def _setup_cgroups(self, args, my_cpus, memlimit, memory_nodes): + """ + This method creates the CGroups for the following execution. + @param args: the command line to run, used only for logging + @param my_cpus: None or a list of the CPU cores to use + @param memlimit: None or memory limit in bytes + @param memory_nodes: None or a list of memory nodes of a NUMA system to use + @return cgroups: a map of all the necessary cgroups for the following execution. + Please add the process of the following execution to all those cgroups! + """ + + # Setup cgroups, need a single call to create_cgroup() for all subsystems + subsystems = [CPUACCT, MEMORY] + if my_cpus is not None: + subsystems.append(CPUSET) + + cgroups = create_cgroup(self.cgroupsParents, *subsystems) + + logging.debug("Executing {0} in cgroups {1}.".format(args, cgroups.values())) + + # Setup cpuset cgroup if necessary to limit the CPU cores/memory nodes to be used. + if my_cpus is not None: + cgroupCpuset = cgroups[CPUSET] + my_cpus_str = ','.join(map(str, my_cpus)) + write_file(my_cpus_str, cgroupCpuset, 'cpuset.cpus') + my_cpus_str = read_file(cgroupCpuset, 'cpuset.cpus') + logging.debug('Executing {0} with cpu cores [{1}].'.format(args, my_cpus_str)) + + if memory_nodes is not None: + cgroupCpuset = cgroups[CPUSET] + write_file(','.join(map(str, memory_nodes)), cgroupCpuset, 'cpuset.mems') + memory_nodesStr = read_file(cgroupCpuset, 'cpuset.mems') + logging.debug('Executing {0} with memory nodes [{1}].'.format(args, memory_nodesStr)) + + + # Setup memory limit + if memlimit is not None: + cgroupMemory = cgroups[MEMORY] + + limitFile = 'memory.limit_in_bytes' + write_file(str(memlimit), cgroupMemory, limitFile) + + swapLimitFile = 'memory.memsw.limit_in_bytes' + # We need swap limit because otherwise the kernel just starts swapping + # out our process if the limit is reached. + # Some kernels might not have this feature, + # which is ok if there is actually no swap. + if not os.path.exists(os.path.join(cgroupMemory, swapLimitFile)): + if _has_swap(): + sys.exit('Kernel misses feature for accounting swap memory, but machine has swap. Please set swapaccount=1 on your kernel command line or disable swap with "sudo swapoff -a".') + else: + try: + write_file(str(memlimit), cgroupMemory, swapLimitFile) + except IOError as e: + if e.errno == 95: # kernel responds with error 95 (operation unsupported) if this is disabled + sys.exit('Memory limit specified, but kernel does not allow limiting swap memory. Please set swapaccount=1 on your kernel command line or disable swap with "sudo swapoff -a".') + raise e + + memlimit = read_file(cgroupMemory, limitFile) + logging.debug('Executing {0} with memory limit {1} bytes.'.format(args, memlimit)) + + if not os.path.exists(os.path.join(cgroups[MEMORY], 'memory.memsw.max_usage_in_bytes')) and _has_swap(): + logging.warning('Kernel misses feature for accounting swap memory, but machine has swap. Memory usage may be measured inaccurately. Please set swapaccount=1 on your kernel command line or disable swap with "sudo swapoff -a".') + + return cgroups + + + def _execute(self, args, output_filename, cgroups, hardtimelimit, softtimelimit, walltimelimit, myCpuCount, memlimit, environments, workingDir): + """ + This method executes the command line and waits for the termination of it. + """ + + def preSubprocess(): + os.setpgrp() # make subprocess to group-leader + os.nice(5) # increase niceness of subprocess + + if hardtimelimit is not None: + # Also use ulimit for CPU time limit as a fallback if cgroups are not available + resource.setrlimit(resource.RLIMIT_CPU, (hardtimelimit, hardtimelimit)) + # TODO: using ulimit allows the tool to be killed because of timelimit + # without the termination reason to be properly set + + # put us into the cgroup(s) + pid = os.getpid() + # On some systems, cgrulesengd would move our process into other cgroups. + # We disable this behavior via libcgroup if available. + # Unfortunately, logging/printing does not seem to work here. + from ctypes import cdll + try: + libcgroup = cdll.LoadLibrary('libcgroup.so.1') + failure = libcgroup.cgroup_init() + if failure: + pass + #print('Could not initialize libcgroup, error {}'.format(success)) + else: + CGROUP_DAEMON_UNCHANGE_CHILDREN = 0x1 + failure = libcgroup.cgroup_register_unchanged_process(pid, CGROUP_DAEMON_UNCHANGE_CHILDREN) + if failure: + pass + #print('Could not register process to cgrulesndg, error {}. Probably the daemon will mess up our cgroups.'.format(success)) + except OSError as e: + pass + #print('libcgroup is not available: {}'.format(e.strerror)) + + for cgroup in set(cgroups.values()): + add_task_to_cgroup(cgroup, pid) + + + # copy parent-environment and set needed values, either override or append + runningEnv = os.environ.copy() + for key, value in environments.get("newEnv", {}).items(): + runningEnv[key] = value + for key, value in environments.get("additionalEnv", {}).items(): + runningEnv[key] = runningEnv.get(key, "") + value + + logging.debug("Using additional environment {0}.".format(str(environments))) + + # write command line into outputFile + outputFile = open(output_filename, 'w') # override existing file + outputFile.write(' '.join(args) + '\n\n\n' + '-' * 80 + '\n\n\n') + outputFile.flush() + + timelimitThread = None + oomThread = None + energyBefore = util.measure_energy() + walltime_before = time.time() + + p = None + try: + p = subprocess.Popen(args, + stdout=outputFile, stderr=outputFile, + env=runningEnv, cwd=workingDir, + preexec_fn=preSubprocess) + + except OSError as e: + logging.critical("OSError {0} while starting {1}: {2}. " + + "Assure that the directory containing the tool to be benchmarked is included " + + "in the PATH environment variable or an alias is set." + .format(e.errno, args[0], e.strerror)) + return (0, 0, 0, None) + + try: + with self.SUB_PROCESSES_LOCK: + self.SUB_PROCESSES.add(p) + + if hardtimelimit is not None and CPUACCT in cgroups: + # Start a timer to periodically check timelimit with cgroup + # if the tool uses subprocesses and ulimit does not work. + timelimitThread = _TimelimitThread(cgroups[CPUACCT], hardtimelimit, softtimelimit, walltimelimit, p, myCpuCount, self._set_termination_reason) + timelimitThread.start() + + if memlimit is not None: + try: + oomThread = oomhandler.KillProcessOnOomThread(cgroups[MEMORY], p, + self._set_termination_reason) + oomThread.start() + except OSError as e: + logging.critical("OSError {0} during setup of OomEventListenerThread: {1}.".format(e.errno, e.strerror)) + + try: + logging.debug("waiting for: pid:{0}".format(p.pid)) + pid, returnvalue, ru_child = os.wait4(p.pid, 0) + logging.debug("waiting finished: pid:{0}, retVal:{1}".format(pid, returnvalue)) + + except OSError as e: + returnvalue = 0 + ru_child = None + if self.PROCESS_KILLED: + # OSError 4 (interrupted system call) seems always to happen if we killed the process ourselves after Ctrl+C was pressed + logging.debug("OSError {0} while waiting for termination of {1} ({2}): {3}.".format(e.errno, args[0], p.pid, e.strerror)) + else: + logging.critical("OSError {0} while waiting for termination of {1} ({2}): {3}.".format(e.errno, args[0], p.pid, e.strerror)) + + finally: + walltime_after = time.time() + + + with self.SUB_PROCESSES_LOCK: + self.SUB_PROCESSES.discard(p) + + if timelimitThread: + timelimitThread.cancel() + + if oomThread: + oomThread.cancel() + + outputFile.close() # normally subprocess closes file, we do this again + + logging.debug("size of logfile '{0}': {1}".format(output_filename, str(os.path.getsize(output_filename)))) + + # kill all remaining processes if some managed to survive + for cgroup in set(cgroups.values()): + kill_all_tasks_in_cgroup(cgroup) + + energy = util.measure_energy(energyBefore) + walltime = walltime_after - walltime_before + cputime = ru_child.ru_utime + ru_child.ru_stime if ru_child else 0 + return (returnvalue, walltime, cputime, energy) + + + + def _get_exact_measures(self, cgroups, returnvalue, walltime, cputime): + """ + This method tries to extract better measures from cgroups. + """ + + cputime2 = None + if CPUACCT in cgroups: + # We want to read the value from the cgroup. + # The documentation warns about outdated values. + # So we read twice with 0.1s time difference, + # and continue reading as long as the values differ. + # This has never happened except when interrupting the script with Ctrl+C, + # but just try to be on the safe side here. + cgroupCpuacct = cgroups[CPUACCT] + tmp = _read_cputime(cgroupCpuacct) + tmp2 = None + while tmp != tmp2: + time.sleep(0.1) + tmp2 = tmp + tmp = _read_cputime(cgroupCpuacct) + cputime2 = tmp + + memUsage = None + if MEMORY in cgroups: + # This measurement reads the maximum number of bytes of RAM+Swap the process used. + # For more details, c.f. the kernel documentation: + # https://www.kernel.org/doc/Documentation/cgroups/memory.txt + memUsageFile = 'memory.memsw.max_usage_in_bytes' + if not os.path.exists(os.path.join(cgroups[MEMORY], memUsageFile)): + memUsageFile = 'memory.max_usage_in_bytes' + if not os.path.exists(os.path.join(cgroups[MEMORY], memUsageFile)): + logging.warning('Memory-usage is not available due to missing files.') + else: + try: + memUsage = read_file(cgroups[MEMORY], memUsageFile) + memUsage = int(memUsage) + except IOError as e: + if e.errno == 95: # kernel responds with error 95 (operation unsupported) if this is disabled + logging.critical("Kernel does not track swap memory usage, cannot measure memory usage. " + + "Please set swapaccount=1 on your kernel command line.") + else: + raise e + + logging.debug('Run exited with code {0}, walltime={1}, cputime={2}, cgroup-cputime={3}, memory={4}' + .format(returnvalue, walltime, cputime, cputime2, memUsage)) + + # Usually cputime2 seems to be 0.01s greater than cputime. + # Furthermore, cputime might miss some subprocesses, + # therefore we expect cputime2 to be always greater (and more correct). + # However, sometimes cputime is a little bit bigger than cputime2. + # This may indicate a problem with cgroups, for example another process + # moving our benchmarked process between cgroups. + if cputime2 is not None: + if (cputime * 0.9) > cputime2: + logging.warning('Cputime measured by wait was {0}, cputime measured by cgroup was only {1}, perhaps measurement is flawed.'.format(cputime, cputime2)) + else: + cputime = cputime2 + + return (cputime, memUsage) + + + def execute_run(self, args, output_filename, + hardtimelimit=None, softtimelimit=None, walltimelimit=None, + cores=None, memlimit=None, memory_nodes=None, + environments={}, workingDir=None, maxLogfileSize=None): + """ + This function executes a given command with resource limits, + and writes the output to a file. + @param args: the command line to run + @param output_filename: the file where the output should be written to + @param hardtimelimit: None or the CPU time in seconds after which the tool is forcefully killed. + @param softtimelimit: None or the CPU time in seconds after which the tool is sent a kill signal. + @param walltimelimit: None or the wall time in seconds after which the tool is forcefully killed (default: hardtimelimit + a few seconds) + @param cores: None or a list of the CPU cores to use + @param memlimit: None or memory limit in bytes + @param memory_nodes: None or a list of memory nodes in a NUMA system to use + @param environments: special environments for running the command + @param workingDir: None or a directory which the execution should use as working directory + @param maxLogfileSize: None or a number of bytes to which the output of the tool should be truncated approximately if there is too much output. + @return: a tuple with walltime in seconds, cputime in seconds, memory usage in bytes, returnvalue, and process output + """ + + if hardtimelimit is not None: + if hardtimelimit <= 0: + sys.exit("Invalid time limit {0}.".format(hardtimelimit)) + if softtimelimit is not None: + if softtimelimit <= 0: + sys.exit("Invalid soft time limit {0}.".format(softtimelimit)) + if hardtimelimit is None: + sys.exit("Soft time limit without hard time limit is not implemented.") + if softtimelimit > hardtimelimit: + sys.exit("Soft time limit cannot be larger than the hard time limit.") + + if walltimelimit is None: + if hardtimelimit is not None: + walltimelimit = hardtimelimit + _WALLTIME_LIMIT_DEFAULT_OVERHEAD + else: + if walltimelimit <= 0: + sys.exit("Invalid wall time limit {0}.".format(walltimelimit)) + if hardtimelimit is None: + sys.exit("Wall time limit without hard time limit is not implemented.") + + if cores is not None: + if self.cpus is None: + sys.exit("Cannot limit CPU cores without cpuset cgroup.") + coreCount = len(cores) + if coreCount == 0: + sys.exit("Cannot execute run without any CPU core.") + if not set(cores).issubset(self.cpus): + sys.exit("Cores {0} are not allowed to be used".format(list(set(cores).difference(self.cpus)))) + else: + try: + coreCount = multiprocessing.cpu_count() + except NotImplementedError: + coreCount = 1 + + if memlimit is not None: + if memlimit <= 0: + sys.exit("Invalid memory limit {0}.".format(memlimit)) + if not self.cgroupsParents[MEMORY]: + sys.exit("Memory limit specified, but cannot be implemented without cgroup support.") + + if memory_nodes is not None: + if self.memory_nodes is None: + sys.exit("Cannot restrict memory nodes without cpuset cgroup.") + if len(memory_nodes) == 0: + sys.exit("Cannot execute run without any memory node.") + if not set(memory_nodes).issubset(self.memory_nodes): + sys.exit("Memory nodes {0} are not allowed to be used".format(list(set(memory_nodes).difference(self.memory_nodes)))) + + if workingDir: + if not os.path.exists(workingDir): + sys.exit("Working directory {0} does not exist.".format(workingDir)) + if not os.path.isdir(workingDir): + sys.exit("Working directory {0} is not a directory.".format(workingDir)) + if not os.access(workingDir, os.X_OK): + sys.exit("Permission denied for working directory {0}.".format(workingDir)) + + self._termination_reason = None + + logging.debug("execute_run: setting up Cgroups.") + cgroups = self._setup_cgroups(args, cores, memlimit, memory_nodes) + + try: + logging.debug("execute_run: executing tool.") + (exitcode, walltime, cputime, energy) = \ + self._execute(args, output_filename, cgroups, + hardtimelimit, softtimelimit, walltimelimit, + coreCount, memlimit, + environments, workingDir) + + logging.debug("execute_run: getting exact measures.") + (cputime, memUsage) = self._get_exact_measures(cgroups, exitcode, walltime, cputime) + + finally: # always try to cleanup cgroups, even on sys.exit() + logging.debug("execute_run: cleaning up CGroups.") + for cgroup in set(cgroups.values()): + # Need the set here to delete each cgroup only once. + remove_cgroup(cgroup) + + # if exception is thrown, skip the rest, otherwise perform normally + + _reduce_file_size_if_necessary(output_filename, maxLogfileSize) + + if exitcode not in [0,1]: + logging.debug("execute_run: analysing output for crash-info.") + _get_debug_output_after_crash(output_filename) + + logging.debug("execute_run: Run execution returns with code {0}, walltime={1}, cputime={2}, memory={3}, energy={4}" + .format(exitcode, walltime, cputime, memUsage, energy)) + + result = {'walltime': walltime, + 'cputime': cputime, + 'exitcode': exitcode, + } + if memUsage: + result['memory'] = memUsage + if self._termination_reason: + result['terminationreason'] = self._termination_reason + if energy: + result['energy'] = energy + return result + + def _set_termination_reason(self, reason): + self._termination_reason = reason + + def stop(self): + self._set_termination_reason('killed') + self.PROCESS_KILLED = True + with self.SUB_PROCESSES_LOCK: + for process in self.SUB_PROCESSES: + logging.warn('Killing process {0} forcefully.'.format(process.pid)) + util.kill_process(process.pid) + + +def _reduce_file_size_if_necessary(fileName, maxSize): + """ + This function shrinks a file. + We remove only the middle part of a file, + the file-start and the file-end remain unchanged. + """ + if maxSize is None: return # disabled, nothing to do + + fileSize = os.path.getsize(fileName) + if fileSize < (maxSize + 500): return # not necessary + + logging.warning("Logfile '{0}' is too big (size {1} bytes). Removing lines.".format(fileName, fileSize)) + + # We partition the file into 3 parts: + # A) start: maxSize/2 bytes we want to keep + # B) middle: part we want to remove + # C) end: maxSize/2 bytes we want to keep + + # Trick taken from StackOverflow: + # https://stackoverflow.com/questions/2329417/fastest-way-to-delete-a-line-from-large-file-in-python + # We open the file twice at the same time, once for reading and once for writing. + # We position the one file object at the beginning of B + # and the other at the beginning of C. + # Then we copy the content of C into B, overwriting what is there. + # Afterwards we truncate the file after A+C. + + with open(fileName, 'r+') as outputFile: + with open(fileName, 'r') as inputFile: + # Position outputFile between A and B + outputFile.seek(maxSize // 2) + outputFile.readline() # jump to end of current line so that we truncate at line boundaries + + outputFile.write("\n\n\nWARNING: YOUR LOGFILE WAS TOO LONG, SOME LINES IN THE MIDDLE WERE REMOVED.\n\n\n\n") + + # Position inputFile between B and C + inputFile.seek(-maxSize // 2, os.SEEK_END) # jump to beginning of second part we want to keep from end of file + inputFile.readline() # jump to end of current line so that we truncate at line boundaries + + # Copy C over B + _copy_all_lines_from_to(inputFile, outputFile) + + outputFile.truncate() + + +def _copy_all_lines_from_to(inputFile, outputFile): + """ + Copy all lines from an input file object to an output file object. + """ + currentLine = inputFile.readline() + while currentLine: + outputFile.write(currentLine) + currentLine = inputFile.readline() + + +def _get_debug_output_after_crash(output_filename): + """ + Segmentation faults and some memory failures reference a file + with more information (hs_err_pid_*). We append this file to the log. + The format that we expect is a line + "# An error report file with more information is saved as:" + and the file name of the dump file on the next line. + """ + foundDumpFile = False + with open(output_filename, 'r+') as outputFile: + for line in outputFile: + if foundDumpFile: + try: + dumpFileName = line.strip(' #\n') + outputFile.seek(0, os.SEEK_END) # jump to end of log file + with open(dumpFileName, 'r') as dumpFile: + _copy_all_lines_from_to(dumpFile, outputFile) + os.remove(dumpFileName) + except IOError as e: + logging.warn('Could not append additional segmentation fault information from {0} ({1})'.format(dumpFile, e.strerror)) + break + if unicode(line, errors='ignore').startswith('# An error report file with more information is saved as:'): + logging.debug('Going to append error report file') + foundDumpFile = True + + +def _read_cputime(cgroupCpuacct): + cputimeFile = os.path.join(cgroupCpuacct, 'cpuacct.usage') + if not os.path.exists(cputimeFile): + logging.warning('Could not read cputime. File {0} does not exist.'.format(cputimeFile)) + return 0 # dummy value, if cputime is not available + return float(read_file(cputimeFile))/1000000000 # nano-seconds to seconds + + +class _TimelimitThread(threading.Thread): + """ + Thread that periodically checks whether the given process has already + reached its timelimit. After this happens, the process is terminated. + """ + def __init__(self, cgroupCpuacct, hardtimelimit, softtimelimit, walltimelimit, process, cpuCount=1, + callbackFn=lambda reason: None): + super(_TimelimitThread, self).__init__() + self.daemon = True + self.cgroupCpuacct = cgroupCpuacct + self.timelimit = hardtimelimit + self.softtimelimit = softtimelimit or hardtimelimit + self.latestKillTime = time.time() + walltimelimit + self.cpuCount = cpuCount + self.process = process + self.callback = callbackFn + self.finished = threading.Event() + + def run(self): + while not self.finished.is_set(): + read = False + while not read: + try: + usedCpuTime = _read_cputime(self.cgroupCpuacct) + read = True + except ValueError: + # Sometimes the kernel produces strange values with linebreaks in them + time.sleep(1) + pass + remainingCpuTime = self.timelimit - usedCpuTime + remainingWallTime = self.latestKillTime - time.time() + logging.debug("TimelimitThread for process {0}: used CPU time: {1}, remaining CPU time: {2}, remaining wall time: {3}." + .format(self.process.pid, usedCpuTime, remainingCpuTime, remainingWallTime)) + if remainingCpuTime <= 0: + self.callback('cputime') + logging.debug('Killing process {0} due to CPU time timeout.'.format(self.process.pid)) + util.kill_process(self.process.pid) + self.finished.set() + return + if remainingWallTime <= 0: + self.callback('walltime') + logging.warning('Killing process {0} due to wall time timeout.'.format(self.process.pid)) + util.kill_process(self.process.pid) + self.finished.set() + return + + if (self.softtimelimit - usedCpuTime) <= 0: + self.callback('cputime-soft') + # soft time limit violated, ask process to terminate + util.kill_process(self.process.pid, signal.SIGTERM) + self.softtimelimit = self.timelimit + + remainingTime = min(remainingCpuTime/self.cpuCount, remainingWallTime) + self.finished.wait(remainingTime + 1) + + def cancel(self): + self.finished.set() + + +def _has_swap(): + with open('/proc/meminfo', 'r') as meminfo: + for line in meminfo: + if line.startswith('SwapTotal:'): + swap = line.split()[1] + if int(swap) == 0: + return False + return True diff --git a/benchexec/systeminfo.py b/benchexec/systeminfo.py new file mode 100644 index 000000000..6988f4929 --- /dev/null +++ b/benchexec/systeminfo.py @@ -0,0 +1,80 @@ +""" +CPAchecker is a tool for configurable software verification. +This file is part of CPAchecker. + +Copyright (C) 2007-2014 Dirk Beyer +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +CPAchecker web page: + http://cpachecker.sosy-lab.org +""" + +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals + +import os + +class SystemInfo(object): + def __init__(self): + """ + This function finds some information about the computer. + """ + # get info about OS + (sysname, self.hostname, kernel, version, machine) = os.uname() + self.os = sysname + " " + kernel + " " + machine + + # get info about CPU + cpuInfo = dict() + self.cpu_max_frequency = 'unknown' + cpuInfoFilename = '/proc/cpuinfo' + self.cpu_number_of_cores = 'unknown' + if os.path.isfile(cpuInfoFilename) and os.access(cpuInfoFilename, os.R_OK): + cpuInfoFile = open(cpuInfoFilename, 'rt') + cpuInfoLines = [tuple(line.split(':')) for line in + cpuInfoFile.read() + .replace('\n\n', '\n').replace('\t', '') + .strip('\n').split('\n')] + cpuInfo = dict(cpuInfoLines) + cpuInfoFile.close() + self.cpu_number_of_cores = str(len([line for line in cpuInfoLines if line[0] == 'processor'])) + self.cpu_model = cpuInfo.get('model name', 'unknown') \ + .strip() \ + .replace("(R)", "") \ + .replace("(TM)", "") \ + .replace("(tm)", "") + if 'cpu MHz' in cpuInfo: + self.cpu_max_frequency = cpuInfo['cpu MHz'].split('.')[0].strip() + ' MHz' + + # modern cpus may not work with full speed the whole day + # read the number from cpufreq and overwrite cpu_max_frequency from above + freqInfoFilename = '/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq' + if os.path.isfile(freqInfoFilename) and os.access(freqInfoFilename, os.R_OK): + frequencyInfoFile = open(freqInfoFilename, 'rt') + cpu_max_frequency = frequencyInfoFile.read().strip('\n') + frequencyInfoFile.close() + self.cpu_max_frequency = str(int(cpu_max_frequency) // 1000) + ' MHz' + + # get info about memory + memInfo = dict() + memInfoFilename = '/proc/meminfo' + if os.path.isfile(memInfoFilename) and os.access(memInfoFilename, os.R_OK): + memInfoFile = open(memInfoFilename, 'rt') + memInfo = dict(tuple(str.split(': ')) for str in + memInfoFile.read() + .replace('\t', '') + .strip('\n').split('\n')) + memInfoFile.close() + self.memory = memInfo.get('MemTotal', 'unknown').strip() \ No newline at end of file diff --git a/benchexec/tablegenerator/__init__.py b/benchexec/tablegenerator/__init__.py new file mode 100755 index 000000000..a1ab2bd61 --- /dev/null +++ b/benchexec/tablegenerator/__init__.py @@ -0,0 +1,1193 @@ +#!/usr/bin/env python + +""" +CPAchecker is a tool for configurable software verification. +This file is part of CPAchecker. + +Copyright (C) 2007-2014 Dirk Beyer +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +CPAchecker web page: + http://cpachecker.sosy-lab.org +""" + +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals + +import xml.etree.ElementTree as ET +import collections +import os.path +import glob +import json +import argparse +import re +import subprocess +import sys +import time +import tempita + +from decimal import * + +import benchexec.result as result + +NAME_START = "results" # first part of filename of table + +DEFAULT_OUTPUT_PATH = "test/results/" + +LIB_URL = "http://www.sosy-lab.org/lib" +LIB_URL_OFFLINE = "lib/javascript" + +TEMPLATE_FILE_NAME = os.path.join(os.path.dirname(__file__), 'template.{format}') +TEMPLATE_FORMATS = ['html', 'csv'] +TEMPLATE_ENCODING = 'UTF-8' + +LOG_VALUE_EXTRACT_PATTERN = re.compile(': *([^ :]*)') + +DEFAULT_TIME_PRECISION = 3 + +class Util: + """ + This Class contains some useful functions for Strings, Files and Lists. + """ + + @staticmethod + def get_file_list(shortFile): + """ + The function get_file_list expands a short filename to a sorted list + of filenames. The short filename can contain variables and wildcards. + """ + + # expand tilde and variables + expandedFile = os.path.expandvars(os.path.expanduser(shortFile)) + + # expand wildcards + fileList = glob.glob(expandedFile) + + # sort alphabetical, + # if list is emtpy, sorting returns None, so better do not sort + if len(fileList) != 0: + fileList.sort() + else: + print ('\nWarning: no file matches "{0}".'.format(shortFile)) + + return fileList + + + @staticmethod + def extend_file_list(filelist): + ''' + This function takes a list of files, expands wildcards + and returns a new list of files. + ''' + return [file for wildcardFile in filelist for file in Util.get_file_list(wildcardFile)] + + + @staticmethod + def split_number_and_unit(s): + """ + Split a string into two parts: a number prefix and an arbitrary suffix. + Splitting is done from the end, so the split is where the last digit + in the string is (that means the prefix may include non-digit characters, + if they are followed by at least one digit). + """ + if not s: + return (s, '') + pos = len(s) + while pos and not s[pos-1].isdigit(): + pos -= 1 + return (s[:pos], s[pos:]) + + @staticmethod + def remove_unit(s): + """ + Remove a unit from a number string, or return the full string if it is not a number. + """ + (prefix, suffix) = Util.split_number_and_unit(s) + return suffix if prefix == '' else prefix + + @staticmethod + def format_number(s, number_of_digits): + """ + If the value is a number (or number plus one char), + this function returns a string-representation of the number + with a number of digits after the decimal separator. + If the number has more digits, it is rounded, else zeros are added. + + If the value is no number, it is returned unchanged. + """ + # if the number ends with "s" or another unit, remove it + value, suffix = Util.split_number_and_unit((str(s) or '').strip()) + try: + floatValue = float(value) + return "{value:.{width}f}{suffix}".format(width=number_of_digits, value=floatValue, suffix=suffix) + except ValueError: # if value is no float, don't format it + return s + + + @staticmethod + def format_value(value, column): + """ + Format a value nicely for human-readable output (including rounding). + """ + if not value: + return '-' + + number_of_digits = column.number_of_digits + if number_of_digits is None and column.title.lower().endswith('time'): + number_of_digits = DEFAULT_TIME_PRECISION + + if number_of_digits is None: + return value + return Util.format_number(value, number_of_digits) + + + @staticmethod + def to_decimal(s): + # remove whitespaces and trailing units (e.g., in '1.23s') + s, _ = Util.split_number_and_unit((s or '').strip()) + return Decimal(s) if s else Decimal() + + + @staticmethod + def collapse_equal_values(values, counts): + """ + Take a tuple (values, counts), remove consecutive values and increment their count instead. + """ + assert len(values) == len(counts) + previousValue = values[0] + previousCount = 0 + + for value, count in zip(values, counts): + if value != previousValue: + yield (previousValue, previousCount) + previousCount = 0 + previousValue = value + previousCount += count + + yield (previousValue, previousCount) + + @staticmethod + def get_column_value(sourcefileTag, columnTitle, default=None): + for column in sourcefileTag.findall('column'): + if column.get('title') == columnTitle: + return column.get('value') + return default + + @staticmethod + def flatten(list): + return [value for sublist in list for value in sublist] + + @staticmethod + def json(obj): + return tempita.html(json.dumps(obj)) + + @staticmethod + def prettylist(list): + if not list: + return '' + + # Filter out duplicate values while keeping order + values = set() + uniqueList = [] + for entry in list: + if not entry in values: + values.add(entry) + uniqueList.append(entry) + + return uniqueList[0] if len(uniqueList) == 1 \ + else '[' + '; '.join(uniqueList) + ']' + +def parse_table_definition_file(file, all_columns): + ''' + This function parses the input to get run sets and columns. + The param 'file' is an XML file defining the result files and columns. + + If column titles are given in the XML file, + they will be searched in the result files. + If no title is given, all columns of the result file are taken. + + @return: a list of RunSetResult objects + ''' + print ("reading table definition from '{0}'...".format(file)) + if not os.path.isfile(file): + print ('File {0!r} does not exist.'.format(file)) + exit() + + def extract_columns_from_table_definition_file(xmltag): + """ + Extract all columns mentioned in the result tag of a table definition file. + """ + return [Column(c.get("title"), c.text, c.get("numberOfDigits")) + for c in xmltag.findall('column')] + + runSetResults = [] + tableGenFile = ET.ElementTree().parse(file) + if 'table' != tableGenFile.tag: + print ("ERROR:\n" \ + + " The XML file seems to be invalid.\n" \ + + " The rootelement of table-definition-file is not named 'table'.") + exit() + + defaultColumnsToShow = extract_columns_from_table_definition_file(tableGenFile) + + base_dir = os.path.dirname(file) + + def getResultTags(rootTag): + tags = rootTag.findall('result') + if not tags: + tags = rootTag.findall('test') + if tags: + print("Warning: file {0} contains deprecated 'test' tags, rename them to 'result'".format(file)) + return tags + + for resultTag in getResultTags(tableGenFile): + columnsToShow = extract_columns_from_table_definition_file(resultTag) or defaultColumnsToShow + filelist = Util.get_file_list(os.path.join(base_dir, resultTag.get('filename'))) # expand wildcards + runSetResults += [RunSetResult.create_from_xml(resultsFile, parse_results_file(resultsFile), columnsToShow, all_columns) for resultsFile in filelist] + + for unionTag in tableGenFile.findall('union'): + columnsToShow = extract_columns_from_table_definition_file(unionTag) or defaultColumnsToShow + result = RunSetResult([], collections.defaultdict(list), columnsToShow) + + for resultTag in getResultTags(unionTag): + filelist = Util.get_file_list(os.path.join(base_dir, resultTag.get('filename'))) # expand wildcards + for resultsFile in filelist: + result.append(resultsFile, parse_results_file(resultsFile), all_columns) + + if result.filelist: + name = unionTag.get('title', unionTag.get('name')) + if name: + result.attributes['name'] = name + runSetResults.append(result) + + return runSetResults + + +class Column: + """ + The class Column contains title, pattern (to identify a line in log_file), + and number_of_digits of a column. + It does NOT contain the value of a column. + """ + def __init__(self, title, pattern, numOfDigits): + self.title = title + self.pattern = pattern + self.number_of_digits = numOfDigits + + +class RunSetResult(): + """ + The Class RunSetResult contains all the results of one execution of a run set: + the sourcefiles tags (with sourcefiles + values), the columns to show + and the benchmark attributes. + """ + def __init__(self, filelist, attributes, columns, summary={}): + self.filelist = filelist + self.attributes = attributes + self.columns = columns + self.summary = summary + + def get_sourcefile_names(self): + return [file.get('name') for file in self.filelist] + + def append(self, resultFile, resultElem, all_columns=False): + self.filelist += resultElem.findall('sourcefile') + for attrib, values in RunSetResult._extract_attributes_from_result(resultFile, resultElem).items(): + self.attributes[attrib].extend(values) + + if not self.columns: + self.columns = RunSetResult._extract_existing_columns_from_result(resultFile, resultElem, all_columns) + + @staticmethod + def create_from_xml(resultFile, resultElem, columns=None, all_columns=False): + ''' + This function extracts everything necessary for creating a RunSetResult object + from the "result" XML tag of a benchmark result file. + It returns a RunSetResult object. + ''' + attributes = RunSetResult._extract_attributes_from_result(resultFile, resultElem) + + if not columns: + columns = RunSetResult._extract_existing_columns_from_result(resultFile, resultElem, all_columns) + + summary = RunSetResult._extract_summary_from_result(resultElem, columns) + + return RunSetResult(resultElem.findall('sourcefile'), + attributes, columns, summary) + + @staticmethod + def _extract_existing_columns_from_result(resultFile, resultElem, all_columns): + if resultElem.find('sourcefile') is None: + print("Empty resultfile found: " + resultFile) + return [] + else: # show all available columns + columnNames = set() + columns = [] + for s in resultElem.findall('sourcefile'): + for c in s.findall('column'): + title = c.get('title') + if not title in columnNames \ + and (all_columns or c.get('hidden') != 'true'): + columnNames.add(title) + columns.append(Column(title, None, None)) + return columns + + + @staticmethod + def _extract_attributes_from_result(resultFile, resultTag): + attributes = collections.defaultdict(list) + + # Defaults + attributes['name' ] = [resultTag.get('benchmarkname')] + attributes['branch'] = [os.path.basename(resultFile).split('#')[0] if '#' in resultFile else ''] + + # Update with real values + for attrib, value in resultTag.attrib.items(): + attributes[attrib] = [value] + + # Add system information if present + for systemTag in resultTag.findall('systeminfo'): + cpuTag = systemTag.find('cpu') + attributes['os' ].append(systemTag.find('os').get('name')) + attributes['cpu' ].append(cpuTag.get('model')) + attributes['cores'].append( cpuTag.get('cores')) + attributes['freq' ].append(cpuTag.get('frequency')) + attributes['ram' ].append(systemTag.find('ram').get('size')) + attributes['host' ].append(systemTag.get('hostname', 'unknown')) + + return attributes + + @staticmethod + def _extract_summary_from_result(resultTag, columns): + summary = collections.defaultdict(list) + + # Add summary for columns if present + for column in resultTag.findall('column'): + title = column.get('title') + if title in (c.title for c in columns): + summary[title] = column.get('value') + + return summary + + +def parse_results_file(resultFile): + ''' + This function parses a XML file with the results of the execution of a run set. + It returns the "result" XML tag + ''' + if not os.path.isfile(resultFile): + print ('File {0!r} is not found.'.format(resultFile)) + exit() + + print (' ' + resultFile) + + resultElem = ET.ElementTree().parse(resultFile) + + if resultElem.tag not in ['result', 'test']: + print (("ERROR:\n" \ + + "XML file with benchmark results seems to be invalid.\n" \ + + "The root element of the file is not named 'result' or 'test'.\n" \ + + "If you want to run a table-definition file,\n"\ + + "you should use the option '-x' or '--xml'.").replace('\n','\n ')) + exit() + + insert_logfile_names(resultFile, resultElem) + return resultElem + +def insert_logfile_names(resultFile, resultElem): + parts = os.path.basename(resultFile).split("#", 1) + + # get folder of logfiles + date = resultElem.get('date').replace(':','').replace(' ','_') # from ISO-format to filename-format + log_folder = resultElem.get('benchmarkname') + '.' + date + '.logfiles/' + if len(parts) > 1: + log_folder = parts[0] + '#' + log_folder + log_folder = os.path.join(os.path.dirname(resultFile), resultElem.get('baseDir', ''), log_folder) + + # append begin of filename + runSetName = resultElem.get('name') + if runSetName is not None: + blockname = resultElem.get('block') + if blockname is None: + log_folder += runSetName + "." + elif blockname == runSetName: + pass # real runSetName is empty + else: + assert runSetName.endswith("." + blockname) + runSetName = runSetName[:-(1 + len(blockname))] # remove last chars + log_folder += runSetName + "." + + # for each file: append original filename and insert log_file_name into sourcefileElement + for sourcefile in resultElem.findall('sourcefile'): + log_file_name = os.path.basename(sourcefile.get('name')) + ".log" + sourcefile.set('logfile', log_folder + log_file_name) + +def get_default_log_folder(resultElem): + return log_folder + + +def merge_sourcefiles(runSetResults): + """ + This function merges the filelists of all RunSetResult objects. + If necessary, it can merge lists of names: [A,C] + [A,B] --> [A,B,C] + and add dummy elements to the filelists. + It also ensures the same order of files. + Returns a list of filenames + """ + nameList = [] + nameSet = set() + for result in runSetResults: + index = -1 + currentResultNameSet = set() + for name in result.get_sourcefile_names(): + if name in currentResultNameSet: + print ("File {0} is present twice, skipping it.".format(name)) + else: + currentResultNameSet.add(name) + if name not in nameSet: + nameList.insert(index+1, name) + nameSet.add(name) + index += 1 + else: + index = nameList.index(name) + + merge_file_lists(runSetResults, nameList) + return nameList + +def merge_file_lists(runSetResults, filenames): + """ + Set the filelists of all RunSetResult elements so that they contain the same files + in the same order. For missing files a dummy element is inserted. + """ + for result in runSetResults: + # create mapping from name to sourcefile tag + dic = dict([(file.get('name'), file) for file in result.filelist]) + result.filelist = [] # clear and repopulate filelist + for filename in filenames: + fileResult = dic.get(filename) + if fileResult == None: + fileResult = ET.Element('sourcefile') # create an empty dummy element + fileResult.set('logfile', None) + fileResult.set('name', filename) + print (' no result for {0}'.format(filename)) + result.filelist.append(fileResult) + + +def find_common_sourcefiles(runSetResults): + filesInFirstRunSet = runSetResults[0].get_sourcefile_names() + + fileSet = set(filesInFirstRunSet) + for result in runSetResults: + fileSet = fileSet & set(result.get_sourcefile_names()) + + fileList = [] + if not fileSet: + print('No files are present in all benchmark results.') + else: + fileList = [file for file in filesInFirstRunSet if file in fileSet] + merge_file_lists(runSetResults, fileList) + + return fileList + + +class RunResult: + """ + The class RunResult contains the results of a single verification run. + """ + def __init__(self, status, category, log_file, columns, values): + assert(len(columns) == len(values)) + self.status = status + self.log_file = log_file + self.columns = columns + self.values = values + self.category = category + + @staticmethod + def create_from_xml(sourcefileTag, listOfColumns, correct_only): + ''' + This function collects the values from one run. + Only columns that should be part of the table are collected. + ''' + + def read_logfile_lines(logfilename): + if not logfilename: return [] + try: + with open(logfilename, 'rt') as logfile: + return logfile.readlines() + except IOError as e: + print('WARNING: Could not read value from logfile: {}'.format(e)) + return [] + + def get_value_from_logfile(lines, identifier): + """ + This method searches for values in lines of the content. + The format of such a line must be: "identifier: value (rest)". + + If a value is not found, the value is set to None. + """ + # stop after the first line, that contains the searched text + for line in lines: + if identifier in line: + match = LOG_VALUE_EXTRACT_PATTERN.search(line) + return match.group(1).strip() if match else None + return None + + status = Util.get_column_value(sourcefileTag, 'status', '') + category = Util.get_column_value(sourcefileTag, 'category', 'placeholderForUnknown') + + # fallback for compatibility, + # TODO: remove this block and set CATEGORY_UNKNOWN as default value + if status == 'false(label)': + status = 'false(reach)' + if category == 'placeholderForUnknown': + category = result.get_result_category(sourcefileTag.get('name'), status) + + score = result.calculate_score(category, status) + logfileLines = None + + values = [] + + for column in listOfColumns: # for all columns that should be shown + value = None # default value + if column.title.lower() == 'score': + value = str(score) + elif column.title.lower() == 'status': + value = status + + elif not correct_only or score > 0: + if not column.pattern: # collect values from XML + value = Util.get_column_value(sourcefileTag, column.title) + + else: # collect values from logfile + if logfileLines is None: # cache content + logfileLines = read_logfile_lines(sourcefileTag.get('logfile')) + + value = get_value_from_logfile(logfileLines, column.pattern) + + if column.number_of_digits is not None: + value = Util.format_number(value, column.number_of_digits) + + values.append(value) + + return RunResult(status, category, sourcefileTag.get('logfile'), listOfColumns, values) + + +class Row: + """ + The class Row contains all the results for one sourcefile (a list of RunResult instances). + It corresponds to one complete row in the final tables. + """ + def __init__(self, filename): + self.filename = filename + self.results = [] + + def add_run_result(self, runresult): + self.results.append(runresult) + + def set_relative_path(self, common_prefix, base_dir): + """ + generate output representation of rows + """ + # make path relative to directory of output file if necessary + self.file_path = self.filename if os.path.isabs(self.filename) \ + else os.path.relpath(self.filename, base_dir) + + self.short_filename = self.filename.replace(common_prefix, '', 1) + +def rows_to_columns(rows): + """ + Convert a list of Rows into a column-wise list of list of RunResult + """ + return zip(*[row.results for row in rows]) + + +def get_rows(runSetResults, filenames, correct_only): + """ + Create list of rows with all data. Each row consists of several RunResults. + """ + rows = [Row(filename) for filename in filenames] + + # get values for each run set + for result in runSetResults: + # get values for each file in a run set + for fileResult, row in zip(result.filelist, rows): + row.add_run_result(RunResult.create_from_xml(fileResult, result.columns, correct_only)) + + return rows + + +def filter_rows_with_differences(rows): + """ + Find all rows with differences in the status column. + """ + if not rows: + # empty table + return [] + if len(rows[0].results) == 1: + # table with single column + return [] + + def all_equal_result(listOfResults): + allStatus = set([result.status for result in listOfResults if result.status]) + return len(allStatus) <= 1 + + rowsDiff = [row for row in rows if not all_equal_result(row.results)] + + if len(rowsDiff) == 0: + print ("---> NO DIFFERENCE FOUND IN COLUMN 'STATUS'") + elif len(rowsDiff) == len(rows): + print ("---> DIFFERENCES FOUND IN ALL ROWS, NO NEED TO CREATE DIFFERENCE TABLE") + return [] + + return rowsDiff + + + +def get_table_head(runSetResults, commonFileNamePrefix): + + # This list contains the number of columns each run set has + # (the width of a run set in the final table) + # It is used for calculating the column spans of the header cells. + runSetWidths = [len(runSetResult.columns) for runSetResult in runSetResults] + + for runSetResult in runSetResults: + # Ugly because this overwrites the entries in the map, + # but we don't need them anymore and this is the easiest way + for key in runSetResult.attributes: + runSetResult.attributes[key] = Util.prettylist(runSetResult.attributes[key]) + + def get_row(rowName, format, collapse=False, onlyIf=None, default='Unknown'): + def format_cell(attributes): + if onlyIf and not onlyIf in attributes: + formatStr = default + else: + formatStr = format + return formatStr.format(**attributes) + + values = [format_cell(runSetResult.attributes) for runSetResult in runSetResults] + if not any(values): return None # skip row without values completely + + valuesAndWidths = list(Util.collapse_equal_values(values, runSetWidths)) \ + if collapse else list(zip(values, runSetWidths)) + + return tempita.bunch(id=rowName.lower().split(' ')[0], + name=rowName, + content=valuesAndWidths) + + benchmarkNames = [runSetResult.attributes['benchmarkname'] for runSetResult in runSetResults] + allBenchmarkNamesEqual = benchmarkNames.count(benchmarkNames[0]) == len(benchmarkNames) + + titles = [column.title for runSetResult in runSetResults for column in runSetResult.columns] + runSetWidths1 = [1]*sum(runSetWidths) + titleRow = tempita.bunch(id='columnTitles', name=commonFileNamePrefix, + content=list(zip(titles, runSetWidths1))) + + return {'tool': get_row('Tool', '{tool} {version}', collapse=True), + 'limit': get_row('Limits', 'timelimit: {timelimit}, memlimit: {memlimit}, CPU core limit: {cpuCores}', collapse=True), + 'host': get_row('Host', '{host}', collapse=True, onlyIf='host'), + 'os': get_row('OS', '{os}', collapse=True, onlyIf='os'), + 'system': get_row('System', 'CPU: {cpu} with {cores} cores, frequency: {freq}; RAM: {ram}', collapse=True, onlyIf='cpu'), + 'date': get_row('Date of execution', '{date}', collapse=True), + 'runset': get_row('Run set', '{name}' if allBenchmarkNamesEqual else '{benchmarkname}.{name}'), + 'branch': get_row('Branch', '{branch}'), + 'options': get_row('Options', '{options}'), + 'property':get_row('Propertyfile', '{propertyfiles}', collapse=True, onlyIf='propertyfiles', default=''), + 'title': titleRow} + + +def get_stats(rows): + stats = [get_stats_of_run_set(runResults) for runResults in rows_to_columns(rows)] # column-wise + rowsForStats = list(map(Util.flatten, zip(*stats))) # row-wise + + return [tempita.bunch(default=None, title='total tasks', content=rowsForStats[0]), + tempita.bunch(default=None, title='correct results', description='(no bug exists + result is TRUE) OR (bug exists + result is FALSE)', content=rowsForStats[1]), + tempita.bunch(default=None, title='false negatives', description='bug exists + result is TRUE', content=rowsForStats[2]), + tempita.bunch(default=None, title='false positives', description='no bug exists + result is FALSE', content=rowsForStats[3]), + tempita.bunch(default=None, title='false properties', description='bug exists + bug found, but not searched for it', content=rowsForStats[4]), + #TODO re-enable "max score" when we have found a way to correctly calculate it + #tempita.bunch(default=None, title='score ({0} tasks, max score: {1})'.format(len(rows), maxScore), id='score', description='{0} true files, {1} false files'.format(countTrue, countFalse), content=rowsForStats[5]) + tempita.bunch(default=None, title='score ({0} tasks)'.format(len(rows)), id='score', content=rowsForStats[5]) + ] + + +def get_stats_of_run_set(runResults): + """ + This function returns the numbers of the statistics. + @param runResults: All the results of the execution of one run set (as list of RunResult objects) + """ + + # convert: + # [['TRUE', 0,1], ['FALSE', 0,2]] --> [['TRUE', 'FALSE'], [0,1, 0,2]] + # in python2 this is a list, in python3 this is the iterator of the list + # this works, because we iterate over the list some lines below + listsOfValues = zip(*[runResult.values for runResult in runResults]) + + columns = runResults[0].columns + statusList = [(runResult.category, runResult.status) for runResult in runResults] + + # collect some statistics + sumRow = [] + correctRow = [] + wrongTrueRow = [] + wrongFalseRow = [] + wrongPropertyRow = [] + scoreRow = [] + + for column, values in zip(columns, listsOfValues): + if column.title == 'status': + countCorrectTrue, countCorrectFalse, countCorrectProperty, countWrongTrue, countWrongFalse, countWrongProperty, countMissing = get_category_count(statusList) + + sum = StatValue(len([status for (category, status) in statusList if status])) + correct = StatValue(countCorrectTrue + countCorrectFalse + countCorrectProperty) + score = StatValue(result.SCORE_CORRECT_TRUE * countCorrectTrue + \ + result.SCORE_CORRECT_FALSE * countCorrectFalse + \ + result.SCORE_CORRECT_FALSE * countCorrectProperty + \ + result.SCORE_WRONG_TRUE * countWrongTrue + \ + result.SCORE_WRONG_FALSE * countWrongFalse + \ + result.SCORE_WRONG_FALSE * countWrongProperty, + ) + wrongTrue = StatValue(countWrongTrue) + wrongFalse = StatValue(countWrongFalse) + wrongProperty = StatValue(countWrongProperty) + + else: + sum, correct, wrongTrue, wrongFalse, wrongProperty = get_stats_of_number_column(values, statusList, column.title) + score = '' + + if (sum.sum, correct.sum, wrongTrue.sum, wrongFalse.sum) == (0,0,0,0): + (sum, correct, wrongTrue, wrongFalse) = (None, None, None, None) + + sumRow.append(sum) + correctRow.append(correct) + wrongTrueRow.append(wrongTrue) + wrongFalseRow.append(wrongFalse) + wrongPropertyRow.append(wrongProperty) + scoreRow.append(score) + + def replace_irrelevant(row): + count = row[0] + if not count or not count.sum: + for i in range(1, len(row)): + row[i] = None + + replace_irrelevant(sumRow) + replace_irrelevant(correctRow) + replace_irrelevant(wrongTrueRow) + replace_irrelevant(wrongFalseRow) + replace_irrelevant(wrongPropertyRow) + replace_irrelevant(scoreRow) + + return (sumRow, correctRow, wrongTrueRow, wrongFalseRow, wrongPropertyRow, scoreRow) + + +class StatValue: + def __init__(self, sum, min=None, max=None, avg=None, median=None): + self.sum = sum + self.min = min + self.max = max + self.avg = avg + self.median = median + + def __str__(self): + return str(self.sum) + + @classmethod + def from_list(cls, values): + if not values: + return StatValue(0) + + return StatValue(sum(values), + min = min(values), + max = max(values), + avg = float("{:.3f}".format(sum(values) / len(values))), + median = sorted(values)[len(values)//2], + ) + + +def get_category_count(categoryList): + # count different elems in statusList + counts = collections.defaultdict(int) + for category in categoryList: + counts[category] += 1 + + # warning: read next lines carefully, there are some brackets and commas! + return ( + # correctTrue, correctFalseLabel, correctProperty + counts[result.CATEGORY_CORRECT, result.STATUS_TRUE_PROP], + counts[result.CATEGORY_CORRECT, result.STATUS_FALSE_REACH], + counts[result.CATEGORY_CORRECT, result.STATUS_FALSE_DEREF] \ + + counts[result.CATEGORY_CORRECT, result.STATUS_FALSE_FREE] \ + + counts[result.CATEGORY_CORRECT, result.STATUS_FALSE_MEMTRACK] \ + + counts[result.CATEGORY_CORRECT, result.STATUS_FALSE_TERMINATION], + + # wrongTrue, wrongFalseLabel, wrongProperty + counts[result.CATEGORY_WRONG, result.STATUS_TRUE_PROP], + counts[result.CATEGORY_WRONG, result.STATUS_FALSE_REACH], + counts[result.CATEGORY_WRONG, result.STATUS_FALSE_DEREF] \ + + counts[result.CATEGORY_WRONG, result.STATUS_FALSE_FREE] \ + + counts[result.CATEGORY_WRONG, result.STATUS_FALSE_MEMTRACK] \ + + counts[result.CATEGORY_WRONG, result.STATUS_FALSE_TERMINATION], + + # missing + counts[result.CATEGORY_MISSING, result.STATUS_TRUE_PROP] \ + + counts[result.CATEGORY_MISSING, result.STATUS_FALSE_REACH] \ + + counts[result.CATEGORY_MISSING, result.STATUS_FALSE_DEREF] \ + + counts[result.CATEGORY_MISSING, result.STATUS_FALSE_FREE] \ + + counts[result.CATEGORY_MISSING, result.STATUS_FALSE_MEMTRACK] \ + + counts[result.CATEGORY_MISSING, result.STATUS_FALSE_TERMINATION] \ + ) + + +def get_stats_of_number_column(values, categoryList, columnTitle): + assert len(values) == len(categoryList) + try: + valueList = [Util.to_decimal(v) for v in values] + except InvalidOperation as e: + if columnTitle != "host": # we ignore values of column host, used in cloud-mode + print("Warning: {0}. Statistics may be wrong.".format(e)) + return (StatValue(0), StatValue(0), StatValue(0), StatValue(0), StatValue(0)) + + valuesPerCategory = collections.defaultdict(list) + for value, catStat in zip(valueList, categoryList): + category, status = catStat + if category == result.CATEGORY_CORRECT: + status = None # ignore status, we do not need it, use None as DUMMY + valuesPerCategory[category, status].append(value) + + return (StatValue.from_list(valueList), + StatValue.from_list(valuesPerCategory[result.CATEGORY_CORRECT, None]), # None as DUMMY + StatValue.from_list(valuesPerCategory[result.CATEGORY_WRONG, result.STATUS_TRUE_PROP]), + StatValue.from_list(valuesPerCategory[result.CATEGORY_WRONG, result.STATUS_FALSE_REACH]), + StatValue.from_list(valuesPerCategory[result.CATEGORY_WRONG, result.STATUS_FALSE_DEREF] + + valuesPerCategory[result.CATEGORY_WRONG, result.STATUS_FALSE_FREE] + + valuesPerCategory[result.CATEGORY_WRONG, result.STATUS_FALSE_MEMTRACK] + + valuesPerCategory[result.CATEGORY_WRONG, result.STATUS_FALSE_TERMINATION] + ), + ) + + +def get_regression_count(rows, ignoreFlappingTimeouts): # for options.dump_counts + + columns = rows_to_columns(rows) + if len(columns) < 2: + return 0 # no regressions with only one run + + timeouts = set() + for runResults in columns[:-1]: + timeouts |= set(index for (index, runResult) in enumerate(runResults) if runResult.status == 'TIMEOUT') + + def is_flapping_timeout(index, oldResult, newResult): + return index in timeouts \ + and oldResult.status != 'TIMEOUT' \ + and newResult.status == 'TIMEOUT' + + def ignore_regression(oldResult, newResult): + return oldResult.status == 'TIMEOUT' and newResult.status == 'OUT OF MEMORY' \ + or oldResult.status == 'OUT OF MEMORY' and newResult.status == 'TIMEOUT' + + regressions = 0 + for index, (oldResult, newResult) in enumerate(zip(columns[-2], columns[-1])): + # regression can be only if result is different and new result is not correct + if oldResult.status != newResult.status and newResult.category != result.CATEGORY_CORRECT: + + if not (ignoreFlappingTimeouts and is_flapping_timeout(index, oldResult, newResult)) \ + and not ignore_regression(oldResult, newResult): + regressions += 1 + return regressions + + +def get_counts(rows): # for options.dump_counts + countsList = [] + + for runResults in rows_to_columns(rows): + statusList = [(runResult.category, runResult.status) for runResult in runResults] + correctTrue, correctFalse, correctProperty, wrongTrue, wrongFalse, wrongProperty, missing = get_category_count(statusList) + + correct = correctTrue + correctFalse + correctProperty + wrong = wrongTrue + wrongFalse + wrongProperty + unknown = len(statusList) - correct - wrong - missing + + countsList.append((correct, wrong, unknown)) + + return countsList + + +def get_summary(runSetResults): + summaryStats = [] + available = False + for runSetResult in runSetResults: + for column in runSetResult.columns: + if column.title in runSetResult.summary and runSetResult.summary[column.title] != '': + available = True + value = runSetResult.summary[column.title] + else: + value = '' + summaryStats.append(StatValue(value)) + + if available: + return tempita.bunch(default=None, title='local summary', + description='(This line contains some statistics from local execution. Only trust those values, if you use your own computer.)', + content=summaryStats) + else: + return None + + +def create_tables(name, runSetResults, filenames, rows, rowsDiff, outputPath, outputFilePattern, options): + ''' + create tables and write them to files + ''' + + # get common folder of sourcefiles + common_prefix = os.path.commonprefix(filenames) # maybe with parts of filename + common_prefix = common_prefix[: common_prefix.rfind('/') + 1] # only foldername + list(map(lambda row: Row.set_relative_path(row, common_prefix, outputPath), rows)) + + head = get_table_head(runSetResults, common_prefix) + run_sets_data = [runSetResult.attributes for runSetResult in runSetResults] + run_sets_columns = [[column for column in runSet.columns] for runSet in runSetResults] + run_sets_column_titles = [[column.title for column in runSet.columns] for runSet in runSetResults] + + templateNamespace={'flatten': Util.flatten, + 'json': Util.json, + 'relpath': os.path.relpath, + 'format_value': Util.format_value, + 'split_number_and_unit': Util.split_number_and_unit, + 'remove_unit': Util.remove_unit, + } + + def write_table(type, title, rows): + stats = get_stats(rows) + + summary = get_summary(runSetResults) + if summary and type != 'diff' and not options.correct_only and not options.common: + stats.insert(1, summary) + + for format in TEMPLATE_FORMATS: + outfile = os.path.join(outputPath, outputFilePattern.format(name=name, type=type, ext=format)) + print ('writing {0} into {1} ...'.format(format.upper().ljust(4), outfile)) + + # read template + Template = tempita.HTMLTemplate if format == 'html' else tempita.Template + template = Template.from_filename(TEMPLATE_FILE_NAME.format(format=format), + namespace=templateNamespace, + encoding=TEMPLATE_ENCODING) + + # write file + with open(outfile, 'w') as file: + file.write(template.substitute( + title=title, + head=head, + body=rows, + foot=stats, + run_sets=run_sets_data, + columns=run_sets_columns, + columnTitles=run_sets_column_titles, + lib_url=options.lib_url, + base_dir=outputPath, + )) + + if options.show_table and format == 'html': + try: + with open(os.devnull, 'w') as devnull: + subprocess.Popen(['xdg-open', outfile], + stdout=devnull, stderr=devnull) + except OSError: + pass + + # write normal tables + write_table("table", name, rows) + + # write difference tables + if rowsDiff: + write_table("diff", name + " differences", rowsDiff) + +def basename_without_ending(file): + name = os.path.basename(file) + if name.endswith(".xml"): + name = name[:-4] + return name + +def main(args=None): + + if args is None: + args = sys.argv + + parser = argparse.ArgumentParser( + description="""Create table with the results of one or more benchmark executions. + Documented example files for the table definitions can be found in 'doc/examples'\n""" + ) + + parser.add_argument("tables", + metavar="RESULT", + type=str, + nargs='*', + help="XML file with the results from the benchmark script" + ) + parser.add_argument("-x", "--xml", + action="store", + type=str, + dest="xmltablefile", + help="XML file with the table definition." + ) + parser.add_argument("-o", "--outputpath", + action="store", + type=str, + dest="outputPath", + help="Output path for the tables." + ) + parser.add_argument("-n", "--name", + action="store", + type=str, + dest="output_name", + help="Base name of the created output files." + ) + parser.add_argument("--ignore-erroneous-benchmarks", + action="store_true", + dest="ignore_errors", + help="Ignore results where the was an error during benchmarking." + ) + parser.add_argument("-d", "--dump", + action="store_true", dest="dump_counts", + help="Print summary statistics for regressions and the good, bad, and unknown counts." + ) + parser.add_argument("--ignore-flapping-timeout-regressions", + action="store_true", dest="ignoreFlappingTimeouts", + help="For the regression-count statistics, do not count regressions to timeouts if the file already had timeouts before." + ) + parser.add_argument("-c", "--common", + action="store_true", dest="common", + help="Put only sourcefiles into the table for which all benchmarks contain results." + ) + parser.add_argument("--no-diff", + action="store_false", dest="write_diff_table", + help="Do not output a table with result differences between benchmarks." + ) + parser.add_argument("--correct-only", + action="store_true", dest="correct_only", + help="Clear all results (e.g., time) in cases where the result was not correct." + ) + parser.add_argument("--all-columns", + action="store_true", dest="all_columns", + help="Show all columns in tables, including those that are normally hidden." + ) + parser.add_argument("--offline", + action="store_const", dest="lib_url", + const=LIB_URL_OFFLINE, + default=LIB_URL, + help="Don't insert links to http://www.sosy-lab.org, instead expect JS libs in libs/javascript." + ) + parser.add_argument("--show", + action="store_true", dest="show_table", + help="Open the produced HTML table(s) in the default browser." + ) + + options = parser.parse_args(args[1:]) + + name = options.output_name + outputPath = options.outputPath + outputFilePattern = "{name}.{type}.{ext}" + + if options.xmltablefile: + if options.tables: + print ("Invalid additional arguments '{}'".format(" ".join(options.tables))) + exit() + runSetResults = parse_table_definition_file(options.xmltablefile, options.all_columns) + if not name: + name = basename_without_ending(options.xmltablefile) + + if not outputPath: + outputPath = os.path.dirname(options.xmltablefile) + + else: + if options.tables: + inputFiles = options.tables + else: + searchDir = outputPath or DEFAULT_OUTPUT_PATH + print ("searching result files in '{}'...".format(searchDir)) + inputFiles = [os.path.join(searchDir, '*.results*.xml')] + + inputFiles = Util.extend_file_list(inputFiles) # expand wildcards + runSetResults = [RunSetResult.create_from_xml(file, parse_results_file(file), all_columns=options.all_columns) for file in inputFiles] + + if len(inputFiles) == 1: + if not name: + name = basename_without_ending(inputFiles[0]) + outputFilePattern = "{name}.{ext}" + else: + if not name: + name = NAME_START + "." + time.strftime("%y-%m-%d_%H%M", time.localtime()) + + if inputFiles and not outputPath: + dir = os.path.dirname(inputFiles[0]) + if all(dir == os.path.dirname(file) for file in inputFiles): + outputPath = dir + else: + outputPath = DEFAULT_OUTPUT_PATH + + if not outputPath: + outputPath = '.' + + if options.ignore_errors: + filteredRunSets = [] + for runSet in runSetResults: + if 'error' in runSet.attributes: + print('Ignoring benchmark {0} because of error: {1}' + .format(", ".join(set(runSet.attributes['name'])), + ", ".join(set(runSet.attributes['error'])))) + else: + filteredRunSets.append(runSet) + runSetResults = filteredRunSets + + if not runSetResults: + print ('\nError! No benchmark results found.') + exit() + + print ('merging results ...') + if options.common: + filenames = find_common_sourcefiles(runSetResults) + else: + # merge list of run sets, so that all run sets contain the same filenames + filenames = merge_sourcefiles(runSetResults) + + # collect data and find out rows with differences + print ('collecting data ...') + rows = get_rows(runSetResults, filenames, options.correct_only) + if not rows: + print ('Warning: No results found, no tables produced.') + sys.exit() + + rowsDiff = filter_rows_with_differences(rows) if options.write_diff_table else [] + + print ('generating table ...') + if not os.path.isdir(outputPath): os.makedirs(outputPath) + create_tables(name, runSetResults, filenames, rows, rowsDiff, outputPath, outputFilePattern, options) + + print ('done') + + if options.dump_counts: # print some stats for Buildbot + print ("REGRESSIONS {}".format(get_regression_count(rows, options.ignoreFlappingTimeouts))) + countsList = get_counts(rows) + print ("STATS") + for counts in countsList: + print (" ".join(str(e) for e in counts)) + + +if __name__ == '__main__': + try: + sys.exit(main()) + except KeyboardInterrupt: + print ('script was interrupted by user') + pass diff --git a/benchexec/tablegenerator/template.csv b/benchexec/tablegenerator/template.csv new file mode 100644 index 000000000..098f1e16c --- /dev/null +++ b/benchexec/tablegenerator/template.csv @@ -0,0 +1,8 @@ +{{for line in ['tool', 'runset', 'title']}} +{{if line in head and head[line]}} +{{head[line].name.lower()}} {{flatten([[value]*width for value, width in head[line].content]) | '\t'.join}} +{{endif}} +{{endfor}} +{{for line in body}} +{{line.short_filename}} {{[remove_unit(value or '') for runResult in line.results for value in runResult.values] | '\t'.join}} +{{endfor}} diff --git a/benchexec/tablegenerator/template.html b/benchexec/tablegenerator/template.html new file mode 100644 index 000000000..9f1044e6e --- /dev/null +++ b/benchexec/tablegenerator/template.html @@ -0,0 +1,1318 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +{{title}} + + + + + +
+
+ + + +{{py: +def format_options(str): + '''Helper function for formatting the content of the options line''' + optionsAsList = html_quote(' ' + str).split(' -') + optionsAsList = [' -' + option + '' for option in optionsAsList[1:]] + return html(''.join(optionsAsList)) +}} +{{for lineName in ['tool', 'limit', 'host', 'os', 'system', 'date', 'runset', 'branch', 'options', 'property', 'title']}} + {{if lineName in head and head[lineName]}} + {{py:line = head[lineName]}} + + + {{for cell, width in line.content}} + + {{endfor}} + + {{endif}} +{{endfor}} + + + +{{for line in body}} + + {{for runResult in line.results}} + {{for column, value in zip(runResult.columns, runResult.values)}} + {{if column.title == 'status'}} + {{if runResult.log_file}} + + {{else}} + + {{endif}} + {{else}} + + {{endif}} + {{endfor}} + {{endfor}} + +{{endfor}} + + + +{{py: +def format_stat_title(stat, column, format_value): + '''Helper function to create the tooltip of a status cell containing average etc.''' + if not stat or not stat.avg: + return None + values = {k: format_value(v, column) for k, v in stat.__dict__.items()} + return 'Min: {min}, Max: {max}, Average: {avg}, Median: {median}'.format(**values) +}} +{{py:line = head['title']}} + + + {{for cell, width in line.content}} + + {{endfor}} + +{{for line in foot}} + + + {{for cell, column in zip(line.content, flatten(columns))}} + + {{endfor}} + +{{endfor}} +{{py:line = head['runset']}} + + + {{for cell, width in line.content}} + + {{endfor}} + + +
{{line.name}}{{format_options(cell) if line.id == 'options' else cell}}
{{line.short_filename}}{{(value or '-').lower()}}{{(value or '-').lower()}}{{format_value(value, column)}}
{{line.name}}{{cell}}
{{line.title}}{{format_value(cell, column)}}
{{line.name}}{{cell}}
+ + + + diff --git a/benchexec/test_core_assignment.py b/benchexec/test_core_assignment.py new file mode 100644 index 000000000..48ca25f3e --- /dev/null +++ b/benchexec/test_core_assignment.py @@ -0,0 +1,280 @@ +""" +CPAchecker is a tool for configurable software verification. +This file is part of CPAchecker. + +Copyright (C) 2007-2014 Dirk Beyer +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +CPAchecker web page: + http://cpachecker.sosy-lab.org +""" + +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals + +import sys +sys.dont_write_bytecode = True # prevent creation of .pyc files + +import itertools +import logging +import unittest + +from .localexecution import _get_cpu_cores_per_run0 + + +class TestCpuCoresPerRun(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.longMessage = True + logging.disable(logging.CRITICAL) + + def assertValid(self, coreLimit, num_of_threads, expectedResult=None): + result = _get_cpu_cores_per_run0(coreLimit, num_of_threads, *self.machine()) + if expectedResult: + self.assertEqual(expectedResult, result, "Incorrect result for {} cores and {} threads.".format(coreLimit, num_of_threads)) + + def assertInvalid(self, coreLimit, num_of_threads): + self.assertRaises(SystemExit, _get_cpu_cores_per_run0, coreLimit, num_of_threads, *self.machine()) + + def machine(self): + """Create the necessary parameters of _get_cpu_cores_per_run0 for a specific machine.""" + core_count = self.cpus * self.cores + allCpus = range(core_count) + cores_of_package = {} + ht_spread = core_count // 2 + for package in xrange(self.cpus): + start = package * self.cores // (2 if self.ht else 1) + end = (package+1) * self.cores // (2 if self.ht else 1) + cores_of_package[package] = range(start, end) + if self.ht: + cores_of_package[package].extend(xrange(start + ht_spread, end + ht_spread)) + siblings_of_core = {} + for core in allCpus: + siblings_of_core[core] = [core] + if self.ht: + for core in allCpus: + siblings_of_core[core].append((core + ht_spread) % core_count) + siblings_of_core[core].sort() + return (allCpus, cores_of_package, siblings_of_core) + + + def test_singleThread(self): + # test all possible coreLimits for a single thread + core_count = self.cpus * self.cores + if self.ht: + # Creates list alternating between real core and hyper-threading core + singleThread_assignment = list(itertools.chain(*zip(range(core_count // 2), range(core_count // 2, core_count)))) + else: + singleThread_assignment = range(core_count) + for coreLimit in xrange(1, core_count + 1): + self.assertValid(coreLimit, 1, [sorted(singleThread_assignment[:coreLimit])]) + self.assertInvalid(core_count + 1, 1) + + # expected order in which cores are used for runs with coreLimit==1/2/3/4/8, used by the following tests + # these fields should be filled in by subclasses to activate the corresponding tests + # (same format as the expected return value by _get_cpu_cores_per_run) + oneCore_assignment = None + twoCore_assignment = None + threeCore_assignment = None + fourCore_assignment = None + eightCore_assignment = None + + def test_oneCorePerRun(self): + # test all possible numOfThread values for runs with one core + maxThreads = self.cpus * self.cores + self.assertInvalid(1, maxThreads + 1) + if not self.oneCore_assignment: + self.skipTest("Need result specified") + for num_of_threads in xrange(1, maxThreads + 1): + self.assertValid(1, num_of_threads, self.oneCore_assignment[:num_of_threads]) + + def test_twoCoresPerRun(self): + # test all possible numOfThread values for runs with two cores + maxThreads = self.cpus * (self.cores // 2) + self.assertInvalid(2, maxThreads + 1) + if not self.twoCore_assignment: + self.skipTest("Need result specified") + for num_of_threads in xrange(1, maxThreads + 1): + self.assertValid(2, num_of_threads, self.twoCore_assignment[:num_of_threads]) + + def test_threeCoresPerRun(self): + # test all possible numOfThread values for runs with three cores + maxThreads = self.cpus * (self.cores // 3) + self.assertInvalid(3, maxThreads + 1) + if not self.threeCore_assignment: + self.skipTest("Need result specified") + for num_of_threads in xrange(1, maxThreads + 1): + self.assertValid(3, num_of_threads, self.threeCore_assignment[:num_of_threads]) + + def test_fourCoresPerRun(self): + # test all possible numOfThread values for runs with four cores + maxThreads = self.cpus * (self.cores // 4) + self.assertInvalid(4, maxThreads + 1) + if not self.fourCore_assignment: + self.skipTest("Need result specified") + for num_of_threads in xrange(1, maxThreads + 1): + self.assertValid(4, num_of_threads, self.fourCore_assignment[:num_of_threads]) + + def test_eightCoresPerRun(self): + # test all possible numOfThread values for runs with eight cores + maxThreads = self.cpus * (self.cores // 8) + if not maxThreads: + self.skipTest("Testing for runs that need to be split across CPUs is not implemented") + self.assertInvalid(8, maxThreads + 1) + if not self.eightCore_assignment: + self.skipTest("Need result specified") + for num_of_threads in xrange(1, maxThreads + 1): + self.assertValid(8, num_of_threads, self.eightCore_assignment[:num_of_threads]) + + +class TestCpuCoresPerRun_singleCPU(TestCpuCoresPerRun): + cpus = 1 + cores = 8 + ht = False + + oneCore_assignment = map(lambda x: [x], range(8)) + twoCore_assignment = [[0, 1], [2, 3], [4, 5], [6, 7]] + threeCore_assignment = [[0, 1, 2], [3, 4, 5]] + fourCore_assignment = [[0, 1, 2, 3], [4, 5, 6, 7]] + eightCore_assignment = [range(8)] + + def test_singleCPU_invalid(self): + self.assertInvalid(2, 5) + self.assertInvalid(5, 2) + self.assertInvalid(3, 3) + + +class TestCpuCoresPerRun_singleCPU_HT(TestCpuCoresPerRun_singleCPU): + ht = True + + twoCore_assignment = [[0, 4], [1, 5], [2, 6], [3, 7]] + threeCore_assignment = [[0, 1, 4], [2, 3, 6]] + fourCore_assignment = [[0, 1, 4, 5], [2, 3, 6, 7]] + + +class TestCpuCoresPerRun_dualCPU_HT(TestCpuCoresPerRun): + cpus = 2 + cores = 16 + ht = True + + oneCore_assignment = map(lambda x: [x], [0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 16, 24, 17, 25, 18, 26, 19, 27, 20, 28, 21, 29, 22, 30, 23, 31]) + + twoCore_assignment = [[0, 16], [8, 24], [1, 17], [9, 25], [2, 18], [10, 26], [3, 19], [11, 27], [4, 20], [12, 28], [5, 21], [13, 29], [6, 22], [14, 30], [7, 23], [15, 31]] + + # Note: the core assignment here is non-uniform, the last two threads are spread over three physical cores + # Currently, the assignment algorithm cannot do better for odd coreLimits, + # but this affects only cases where physical cores are split between runs, which is not recommended anyway. + threeCore_assignment = [[0, 1, 16], [8, 9, 24], [2, 3, 18], [10, 11, 26], [4, 5, 20], [12, 13, 28], [6, 7, 22], [14, 15, 30], [17, 19, 21], [25, 27, 29]] + + fourCore_assignment = [[0, 1, 16, 17], [8, 9, 24, 25], [2, 3, 18, 19], [10, 11, 26, 27], [4, 5, 20, 21], [12, 13, 28, 29], [6, 7, 22, 23], [14, 15, 30, 31]] + + eightCore_assignment = [[0, 1, 2, 3, 16, 17, 18, 19], [8, 9, 10, 11, 24, 25, 26, 27], [4, 5, 6, 7, 20, 21, 22, 23], [12, 13, 14, 15, 28, 29, 30, 31]] + + def test_dualCPU_HT(self): + self.assertValid(16, 2, [range(0, 8) + range(16, 24), range(8, 16) + range(24, 32)]) + + def test_dualCPU_HT_invalid(self): + self.assertInvalid(2, 17) + self.assertInvalid(17, 2) + self.assertInvalid(4, 9) + self.assertInvalid(9, 4) + self.assertInvalid(8, 5) + self.assertInvalid(5, 8) + + +class TestCpuCoresPerRun_threeCPU(TestCpuCoresPerRun): + cpus = 3 + cores = 5 + ht = False + + oneCore_assignment = map(lambda x: [x], [0, 5, 10, 1, 6, 11, 2, 7, 12, 3, 8, 13, 4, 9, 14]) + twoCore_assignment = [[0, 1], [5, 6], [10, 11], [2, 3], [7, 8], [12, 13]] + threeCore_assignment = [[0, 1, 2], [5, 6, 7], [10, 11, 12]] + fourCore_assignment = [[0, 1, 2, 3], [5, 6, 7, 8], [10, 11, 12, 13]] + + def test_threeCPU_invalid(self): + self.assertInvalid(6, 2) + +class TestCpuCoresPerRun_threeCPU_HT(TestCpuCoresPerRun): + cpus = 3 + cores = 10 + ht = True + + oneCore_assignment = map(lambda x: [x], [0, 5, 10, 1, 6, 11, 2, 7, 12, 3, 8, 13, 4, 9, 14, 15, 20, 25, 16, 21, 26, 17, 22, 27, 18, 23, 28, 19, 24, 29]) + twoCore_assignment = [[0, 15], [5, 20], [10, 25], [1, 16], [6, 21], [11, 26], [2, 17], [7, 22], [12, 27], [3, 18], [8, 23], [13, 28], [4, 19], [9, 24], [14, 29]] + threeCore_assignment = [[0, 1, 15], [5, 6, 20], [10, 11, 25], [2, 3, 17], [7, 8, 22], [12, 13, 27], [4, 16, 19], [9, 21, 24], [14, 26, 29]] + fourCore_assignment = [[0, 1, 15, 16], [5, 6, 20, 21], [10, 11, 25, 26], [2, 3, 17, 18], [7, 8, 22, 23], [12, 13, 27, 28]] + eightCore_assignment = [[0, 1, 2, 3, 15, 16, 17, 18], [5, 6, 7, 8, 20, 21, 22, 23], [10, 11, 12, 13, 25, 26, 27, 28]] + + def test_threeCPU_HT_invalid(self): + self.assertInvalid(11, 2) + +class TestCpuCoresPerRun_quadCPU_HT(TestCpuCoresPerRun): + cpus = 4 + cores = 16 + ht = True + + def test_quadCPU_HT(self): + self.assertValid(16, 4, [range(0, 8) + range(32, 40), range(8, 16) + range(40, 48), range(16, 24) + range(48, 56), range(24, 32) + range(56, 64)]) + + # Just test that no exception occurs + self.assertValid(1, 64) + self.assertValid(64, 1) + self.assertValid(2, 32) + self.assertValid(32, 2) + self.assertValid(3, 20) + self.assertValid(16, 3) + self.assertValid(4, 16) + self.assertValid(16, 4) + self.assertValid(5, 12) + self.assertValid(8, 8) + + def test_quadCPU_HT_invalid(self): + self.assertInvalid(2, 33) + self.assertInvalid(33, 2) + self.assertInvalid(3, 21) + self.assertInvalid(17, 3) + self.assertInvalid(4, 17) + self.assertInvalid(17, 4) + self.assertInvalid(5, 13) + self.assertInvalid(9, 5) + self.assertInvalid(6, 9) + self.assertInvalid(9, 6) + self.assertInvalid(7, 9) + self.assertInvalid(9, 7) + self.assertInvalid(8, 9) + self.assertInvalid(9, 8) + + self.assertInvalid(9, 5) + self.assertInvalid(6, 9) + self.assertInvalid(10, 5) + self.assertInvalid(6, 10) + self.assertInvalid(11, 5) + self.assertInvalid(6, 11) + self.assertInvalid(12, 5) + self.assertInvalid(6, 12) + self.assertInvalid(13, 5) + self.assertInvalid(5, 13) + self.assertInvalid(14, 5) + self.assertInvalid(5, 14) + self.assertInvalid(15, 5) + self.assertInvalid(5, 15) + self.assertInvalid(16, 5) + self.assertInvalid(5, 16) + +# prevent execution of base class as its own test +del(TestCpuCoresPerRun) diff --git a/benchexec/tools/__init__.py b/benchexec/tools/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/benchexec/tools/acsar.py b/benchexec/tools/acsar.py new file mode 100644 index 000000000..bc3314017 --- /dev/null +++ b/benchexec/tools/acsar.py @@ -0,0 +1,77 @@ +import os +import subprocess + +import benchexec.util as util +import benchexec.tools.template +import benchexec.result as result + +class Tool(benchexec.tools.template.BaseTool): + + def executable(self): + return util.find_executable('acsar') + + + def name(self): + return 'Acsar' + + + def cmdline(self, executable, options, sourcefiles, propertyfile, rlimits): + assert len(sourcefiles) == 1, "only one sourcefile supported" + sourcefile = sourcefiles[0] + + # create tmp-files for acsar, acsar needs special error-labels + self.prepSourcefile = self._prepareSourcefile(sourcefile) + + return [executable] + ["--file"] + [self.prepSourcefile] + options + + + def _prepareSourcefile(self, sourcefile): + content = open(sourcefile, "r").read() + content = content.replace( + "ERROR;", "ERROR_LOCATION;").replace( + "ERROR:", "ERROR_LOCATION:").replace( + "errorFn();", "goto ERROR_LOCATION; ERROR_LOCATION:;") + newFilename = sourcefile + "_acsar.c" + util.write_file(newFilename, content) + return newFilename + + + def determine_result(self, returncode, returnsignal, output, isTimeout): + output = '\n'.join(output) + if "syntax error" in output: + status = "SYNTAX ERROR" + + elif "runtime error" in output: + status = "RUNTIME ERROR" + + elif "error while loading shared libraries:" in output: + status = "LIBRARY ERROR" + + elif "can not be used as a root procedure because it is not defined" in output: + status = "NO MAIN" + + elif "For Error Location <>: I don't Know " in output: + status = "TIMEOUT" + + elif "received signal 6" in output: + status = "ABORT" + + elif "received signal 11" in output: + status = "SEGFAULT" + + elif "received signal 15" in output: + status = "KILLED" + + elif "Error Location <> is not reachable" in output: + status = result.STATUS_TRUE_PROP + + elif "Error Location <> is reachable via the following path" in output: + status = result.STATUS_FALSE_REACH + + else: + status = result.STATUS_UNKNOWN + + # delete tmp-files + os.remove(self.prepSourcefile) + + return status diff --git a/benchexec/tools/blast.py b/benchexec/tools/blast.py new file mode 100644 index 000000000..709b7afbd --- /dev/null +++ b/benchexec/tools/blast.py @@ -0,0 +1,60 @@ +import subprocess +import os + +import benchexec.util as util +import benchexec.tools.template +import benchexec.result as result + +class Tool(benchexec.tools.template.BaseTool): + + def executable(self): + return util.find_executable('pblast.opt') + + + def program_files(self, executable): + executableDir = os.path.dirname(executable) + return [executableDir] + + + def working_directory(self, executable): + return os.curdir + + + def environment(self, executable): + executableDir = os.path.dirname(executable) + workingDir = self.working_directory(executable) + return {"additionalEnv" : {'PATH' : ':' + (os.path.relpath(executableDir, start=workingDir))}} + + + def version(self, executable): + return subprocess.Popen([executable], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT).communicate()[0][6:11] + + + def cmdline(self, blastExe, options, sourcefiles, propertyfile, rlimits): + workingDir = self.working_directory(blastExe) + ocamlExe = util.find_executable('ocamltune') + return [os.path.relpath(ocamlExe, start=workingDir), os.path.relpath(blastExe, start=workingDir)] + options + sourcefiles + + + def name(self): + return 'BLAST' + + + def determine_result(self, returncode, returnsignal, output, isTimeout): + status = result.STATUS_UNKNOWN + for line in output: + if line.startswith('Error found! The system is unsafe :-('): + status = result.STATUS_FALSE_REACH + elif line.startswith('No error found. The system is safe :-)'): + status = result.STATUS_TRUE_PROP + elif line.startswith('Fatal error: exception Out_of_memory'): + status = 'OUT OF MEMORY' + elif line.startswith('Error: label \'ERROR\' appears multiple times'): + status = 'ERROR' + elif (returnsignal == 9): + status = 'TIMEOUT' + elif 'Ack! The gremlins again!' in line: + status = 'EXCEPTION (Gremlins)' + return status diff --git a/benchexec/tools/cbmc.py b/benchexec/tools/cbmc.py new file mode 100644 index 000000000..5947b51f9 --- /dev/null +++ b/benchexec/tools/cbmc.py @@ -0,0 +1,113 @@ +import logging +import os +import platform +import subprocess +import xml.etree.ElementTree as ET + +import benchexec.util as util +import benchexec.tools.template +import benchexec.result as result + +class Tool(benchexec.tools.template.BaseTool): + """ + Tool wrapper for CBMC (http://www.cprover.org/cbmc/). + It always adds --xml-ui to the command-line arguments for easier parsing of the output. + """ + + def executable(self): + fallback = "lib/native/x86_64-linux/cbmc" if platform.machine() == "x86_64" else \ + "lib/native/x86-linux/cbmc" if platform.machine() == "i386" else None + return util.find_executable('cbmc', fallback) + + + def version(self, executable): + return subprocess.Popen([executable, '--version'], + stdout=subprocess.PIPE).communicate()[0].strip() + + + def name(self): + return 'CBMC' + + + def cmdline(self, executable, options, sourcefiles, propertyfile, rlimits): + if ("--xml-ui" not in options): + options = options + ["--xml-ui"] + + self.options = options + + return [executable] + options + sourcefiles + + + def determine_result(self, returncode, returnsignal, output, isTimeout): + output = '\n'.join(output) + #an empty tag cannot be parsed into a tree + output = output.replace("<>", "") + output = output.replace("", "") + + if returnsignal == 0 and ((returncode == 0) or (returncode == 10)): + try: + tree = ET.fromstring(output) + status = tree.findtext('cprover-status') + + if status is None: + def isErrorMessage(msg): + return msg.get('type', None) == 'ERROR' + + messages = list(filter(isErrorMessage, tree.getiterator('message'))) + if messages: + # for now, use only the first error message if there are several + msg = messages[0].findtext('text') + if msg == 'Out of memory': + status = 'OUT OF MEMORY' + elif msg: + status = 'ERROR (%s)'.format(msg) + else: + status = 'ERROR' + else: + status = 'INVALID OUTPUT' + + elif status == "FAILURE": + assert returncode == 10 + reason = tree.find('goto_trace').find('failure').findtext('reason') + if 'unwinding assertion' in reason: + status = result.STATUS_UNKNOWN + else: + status = result.STATUS_FALSE_REACH + + elif status == "SUCCESS": + assert returncode == 0 + if "--no-unwinding-assertions" in self.options: + status = result.STATUS_UNKNOWN + else: + status = result.STATUS_TRUE_PROP + + except Exception as e: # catch all exceptions + if isTimeout: + # in this case an exception is expected as the XML is invalid + status = 'TIMEOUT' + elif 'Minisat::OutOfMemoryException' in output: + status = 'OUT OF MEMORY' + else: + status = 'INVALID OUTPUT' + logging.warning("Error parsing CBMC output for returncode %d: %s" % (returncode, e)) + + elif returncode == 6: + # parser error or something similar + status = 'ERROR' + + elif returnsignal == 9: + if isTimeout: + status = 'TIMEOUT' + else: + status = "KILLED BY SIGNAL 9" + + elif returnsignal == 6: + status = "ABORTED" + elif returnsignal == 15 or returncode == 15: + status = "KILLED" + elif returncode == 64 and 'Usage error!' in output: + status = 'INVALID ARGUMENTS' + else: + status = "ERROR ({0})".format(returncode) + + return status diff --git a/benchexec/tools/cpachecker.py b/benchexec/tools/cpachecker.py new file mode 100644 index 000000000..232c57cfe --- /dev/null +++ b/benchexec/tools/cpachecker.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python + +""" +CPAchecker is a tool for configurable software verification. +This file is part of CPAchecker. + +Copyright (C) 2007-2014 Dirk Beyer +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +CPAchecker web page: + http://cpachecker.sosy-lab.org +""" + +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals + +import logging +import subprocess +import sys +import string +import os +import re + +sys.dont_write_bytecode = True # prevent creation of .pyc files + +if __name__ == "__main__": + sys.path.append(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir)) + +import benchexec.result as result +import benchexec.util as util +import benchexec.tools.template +from benchexec.model import SOFTTIMELIMIT + +REQUIRED_PATHS = [ + "lib/java/runtime", + "lib/*.jar", + "lib/native/x86_64-linux", + "scripts", + "cpachecker.jar", + "config", + ] + +class Tool(benchexec.tools.template.BaseTool): + """ + Tool wrapper for CPAchecker. + It has additional features such as building CPAchecker before running it + if executed within a source checkout. + It also supports extracting data from the statistics output of CPAchecker + for adding it to the result tables. + """ + + def executable(self): + executable = util.find_executable('cpa.sh', 'scripts/cpa.sh') + executableDir = os.path.join(os.path.dirname(executable), os.path.pardir) + if os.path.isdir(os.path.join(executableDir, 'src')): + self._buildCPAchecker(executableDir) + if not os.path.isfile(os.path.join(executableDir, "cpachecker.jar")): + logging.warning("Required JAR file for CPAchecker not found in {0}.".format(executableDir)) + return executable + + + def _buildCPAchecker(self, executableDir): + logging.debug('Building CPAchecker in directory {0}.'.format(executableDir)) + ant = subprocess.Popen(['ant', '-lib', 'lib/java/build', '-q', 'jar'], cwd=executableDir, shell=util.is_windows()) + (stdout, stderr) = ant.communicate() + if ant.returncode: + sys.exit('Failed to build CPAchecker, please fix the build first.') + + + def program_files(self, executable): + executableDir = os.path.join(os.path.dirname(executable), os.path.pardir) + return util.flatten(util.expand_filename_pattern(path, executableDir) for path in REQUIRED_PATHS) + + + def working_directory(self, executable): + return os.curdir + + + def version(self, executable): + try: + process = subprocess.Popen([executable, '-help'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (stdout, stderr) = process.communicate() + except OSError as e: + logging.warning('Cannot run CPAchecker to determine version: {0}'.format(e.strerror)) + return '' + if stderr: + logging.warning('Cannot determine CPAchecker version, error output: {0}'.format(util.decode_to_string(stderr))) + return '' + if process.returncode: + logging.warning('Cannot determine CPAchecker version, exit code {0}'.format(process.returncode)) + return '' + stdout = util.decode_to_string(stdout) + line = next(l for l in stdout.splitlines() if l.startswith('CPAchecker')) + line = line.replace('CPAchecker' , '') + line = line.split('(')[0] + return line.strip() + + def name(self): + return 'CPAchecker' + + + def cmdline(self, executable, options, sourcefiles, propertyfile=None, rlimits={}): + if SOFTTIMELIMIT in rlimits: + if "-timelimit" in options: + logging.warning('Time limit already specified in command-line options, not adding time limit from benchmark definition to the command line.') + else: + options = options + ["-timelimit", str(rlimits[SOFTTIMELIMIT]) + "s"] # benchmark-xml uses seconds as unit + + # if data.MEMLIMIT in rlimits: + # if "-heap" not in options: + # heapsize = rlimits[MEMLIMIT]*0.8 # 20% overhead for non-java-memory + # options = options + ["-heap", str(int(heapsize)) + "MiB"] # benchmark-xml uses MiB as unit + + if ("-stats" not in options): + options = options + ["-stats"] + + spec = ["-spec", propertyfile] if propertyfile is not None else [] + return [executable] + options + spec + sourcefiles + + + def determine_result(self, returncode, returnsignal, output, isTimeout): + """ + @param returncode: code returned by CPAchecker + @param returnsignal: signal, which terminated CPAchecker + @param output: the output of CPAchecker + @return: status of CPAchecker after executing a run + """ + + def isOutOfNativeMemory(line): + return ('std::bad_alloc' in line # C++ out of memory exception (MathSAT) + or 'Cannot allocate memory' in line + or 'Native memory allocation (malloc) failed to allocate' in line # JNI + or line.startswith('out of memory') # CuDD + ) + + if returnsignal == 0 and returncode > 128: + # shells sets return code to 128+signal when a signal is received + returnsignal = returncode - 128 + + if returnsignal != 0: + if returnsignal == 6: + status = 'ABORTED' + elif returnsignal == 9 and isTimeout: + status = 'TIMEOUT' + elif returnsignal == 11: + status = 'SEGMENTATION FAULT' + elif returnsignal == 15: + status = 'KILLED' + else: + status = 'KILLED BY SIGNAL '+str(returnsignal) + + elif returncode != 0: + status = 'ERROR ({0})'.format(returncode) + + else: + status = '' + + for line in output: + if 'java.lang.OutOfMemoryError' in line: + status = 'OUT OF JAVA MEMORY' + elif isOutOfNativeMemory(line): + status = 'OUT OF NATIVE MEMORY' + elif 'There is insufficient memory for the Java Runtime Environment to continue.' in line \ + or 'cannot allocate memory for thread-local data: ABORT' in line: + status = 'OUT OF MEMORY' + elif 'SIGSEGV' in line: + status = 'SEGMENTATION FAULT' + elif ((returncode == 0 or returncode == 1) + and ('Exception' in line or 'java.lang.AssertionError' in line) + and not line.startswith('cbmc')): # ignore "cbmc error output: ... Minisat::OutOfMemoryException" + status = 'ASSERTION' if 'java.lang.AssertionError' in line else 'EXCEPTION' + elif 'Could not reserve enough space for object heap' in line: + status = 'JAVA HEAP ERROR' + elif line.startswith('Error: ') and not status: + status = 'ERROR' + if 'Unsupported C feature (recursion)' in line: + status = 'ERROR (recursion)' + elif 'Unsupported C feature (threads)' in line: + status = 'ERROR (threads)' + elif 'Parsing failed' in line: + status = 'ERROR (parsing failed)' + elif line.startswith('For your information: CPAchecker is currently hanging at') and status == 'ERROR (1)' and isTimeout: + status = 'TIMEOUT' + + elif line.startswith('Verification result: '): + line = line[21:].strip() + if line.startswith('TRUE'): + newStatus = result.STATUS_TRUE_PROP + elif line.startswith('FALSE'): + newStatus = result.STATUS_FALSE_REACH + match = re.match('.* Property violation \(([^:]*)(:.*)?\) found by chosen configuration.*', line) + if match and match.group(1) in ['valid-deref', 'valid-free', 'valid-memtrack']: + newStatus = result.STR_FALSE + '(' + match.group(1) + ')' + else: + newStatus = result.STATUS_UNKNOWN + + if not status: + status = newStatus + elif newStatus != result.STATUS_UNKNOWN: + status = "{0} ({1})".format(status, newStatus) + + if not status: + status = result.STATUS_UNKNOWN + return status + + + def add_column_values(self, output, columns): + for column in columns: + + # search for the text in output and get its value, + # stop after the first line, that contains the searched text + column.value = "-" # default value + for line in output: + if column.text in line: + startPosition = line.find(':') + 1 + endPosition = line.find('(', startPosition) # bracket maybe not found -> (-1) + if (endPosition == -1): + column.value = line[startPosition:].strip() + else: + column.value = line[startPosition: endPosition].strip() + break + + +if __name__ == "__main__": + tool = Tool() + executable = tool.executable() + print('Executable: {0}'.format(os.path.abspath(executable))) + print('Version: {0}'.format(tool.version(executable))) diff --git a/benchexec/tools/cpalien.py b/benchexec/tools/cpalien.py new file mode 100644 index 000000000..0a1967591 --- /dev/null +++ b/benchexec/tools/cpalien.py @@ -0,0 +1,127 @@ +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals + +import logging +import subprocess +import sys +import string +import os +import re +import benchexec.result as result + +sys.dont_write_bytecode = True # prevent creation of .pyc files + +if __name__ == "__main__": + sys.path.append(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir)) + +import benchexec.util as util +import benchexec.tools.cpachecker + +class Tool(benchmark.tools.cpachecker.Tool): + + def determine_result(self, returncode, returnsignal, output, isTimeout): + """ + @param returncode: code returned by CPAchecker + @param returnsignal: signal, which terminated CPAchecker + @param output: the output of CPAchecker + @return: status of CPAchecker after executing a run + """ + + def isOutOfNativeMemory(line): + return ('std::bad_alloc' in line # C++ out of memory exception (MathSAT) + or 'Cannot allocate memory' in line + or 'Native memory allocation (malloc) failed to allocate' in line # JNI + or line.startswith('out of memory') # CuDD + ) + + if returnsignal == 0 and returncode > 128: + # shells sets return code to 128+signal when a signal is received + returnsignal = returncode - 128 + + if returnsignal != 0: + if returnsignal == 6: + status = 'ABORTED' + elif returnsignal == 9 and isTimeout: + status = 'TIMEOUT' + elif returnsignal == 11: + status = 'SEGMENTATION FAULT' + elif returnsignal == 15: + status = 'KILLED' + else: + status = 'KILLED BY SIGNAL '+str(returnsignal) + + elif returncode != 0: + status = 'ERROR ({0})'.format(returncode) + + else: + status = '' + + bad_free = False + memory_leak = False + bad_deref = False + undef = False + for line in output: + if 'java.lang.OutOfMemoryError' in line: + status = 'OUT OF JAVA MEMORY' + elif isOutOfNativeMemory(line): + status = 'OUT OF NATIVE MEMORY' + elif 'There is insufficient memory for the Java Runtime Environment to continue.' in line \ + or 'cannot allocate memory for thread-local data: ABORT' in line: + status = 'OUT OF MEMORY' + elif 'SIGSEGV' in line: + status = 'SEGMENTATION FAULT' + elif ((returncode == 0 or returncode == 1) + and ('Exception' in line or 'java.lang.AssertionError' in line) + and not line.startswith('cbmc')): # ignore "cbmc error output: ... Minisat::OutOfMemoryException" + status = 'ASSERTION' if 'java.lang.AssertionError' in line else 'EXCEPTION' + elif 'Could not reserve enough space for object heap' in line: + status = 'JAVA HEAP ERROR' + elif line.startswith('Error: ') and not status.startswith('ERROR'): + status = 'ERROR' + if 'Unsupported C feature (recursion)' in line: + status = 'ERROR (recursion)' + elif 'Unsupported C feature (threads)' in line: + status = 'ERROR (threads)' + elif 'Parsing failed' in line: + status = 'ERROR (parsing failed)' + elif 'Unknown function' in line: + status = 'ERROR (unknown function)' + elif line.startswith("Invalid free found"): + bad_free = True + elif line.startswith("Memory leak found"): + memory_leak = True + elif line.startswith("Invalid read found"): + bad_deref = True + elif line.startswith("Invalid write found"): + bad_deref = True + elif line.startswith("Non-target undefined behavior detected."): + status = "ERROR (undefined behavior)" + undef = True; + + elif line.startswith('Verification result: '): + line = line[21:].strip() + if line.startswith('TRUE'): + newStatus = result.STATUS_TRUE_PROP + elif line.startswith('FALSE'): + newStatus = result.STATUS_FALSE_REACH + match = re.match('.* Property violation \(([^:]*)(:.*)?\) found by chosen configuration.*', line) + if match and match.group(1) in ['valid-deref', 'valid-free', 'valid-memtrack']: + newStatus = result.STR_FALSE + '(' + match.group(1) + ')' + + else: + newStatus = result.STATUS_UNKNOWN if not status.startswith('ERROR') else None + if newStatus and not status: + status = newStatus + + if status == 'KILLED (UNKNOWN)': + status = 'KILLED' + if not status or undef: + status = result.STATUS_UNKNOWN + return status + + +if __name__ == "__main__": + tool = Tool() + executable = tool.executable() + print('Executable: {0}'.format(os.path.abspath(executable))) + print('Version: {0}'.format(tool.version(executable))) diff --git a/benchexec/tools/ecav.py b/benchexec/tools/ecav.py new file mode 100644 index 000000000..67b4e2e1f --- /dev/null +++ b/benchexec/tools/ecav.py @@ -0,0 +1,28 @@ +import benchexec.util as util +import benchexec.tools.template +import benchexec.result as result + +class Tool(benchexec.tools.template.BaseTool): + + def executable(self): + return util.find_executable('ecaverifier') + + + def name(self): + return 'EcaVerifier' + + + def determine_result(self, returncode, returnsignal, output, isTimeout): + status = result.STATUS_UNKNOWN + for line in output: + if line.startswith('0 safe, 1 unsafe'): + status = result.STATUS_FALSE_REACH + elif line.startswith('1 safe, 0 unsafe'): + status = result.STATUS_TRUE_PROP + elif returnsignal == 9: + if isTimeout: + status = 'TIMEOUT' + else: + status = "KILLED BY SIGNAL 9" + + return status \ No newline at end of file diff --git a/benchexec/tools/esbmc.py b/benchexec/tools/esbmc.py new file mode 100644 index 000000000..d24b933d0 --- /dev/null +++ b/benchexec/tools/esbmc.py @@ -0,0 +1,111 @@ + +import subprocess +import os +import benchexec.util as util +import benchexec.tools.template +import benchexec.result as result + +class Tool(benchexec.tools.template.BaseTool): + """ + This class serves as tool adaptor for ESBMC (http://www.esbmc.org/) + """ + + def executable(self): + return util.find_executable('esbmc') + + + def program_files(self, executable): + executableDir = os.path.dirname(executable) + return [executableDir] + + + def working_directory(self, executable): + executableDir = os.path.dirname(executable) + return executableDir + + + def environment(self, executable): + return {"additionalEnv" : {'PATH' : ':.'}} + + + def version(self, executable): + return subprocess.Popen([executable, '--version'], + stdout=subprocess.PIPE).communicate()[0].strip() + + + def name(self): + return 'ESBMC' + + + def cmdline(self, executable, options, sourcefiles, propertyfile, rlimits): + assert len(sourcefiles) == 1, "only one sourcefile supported" + sourcefile = sourcefiles[0] + workingDir = self.working_directory(executable) + return [os.path.relpath(executable, start=workingDir)] + options + [os.path.relpath(sourcefile, start=workingDir)] + + + + def determine_result(self, returncode, returnsignal, output, isTimeout): + output = '\n'.join(output) + status = result.STATUS_UNKNOWN + + if self.allInText(['Violated property:', + 'dereference failure: dynamic object lower bound', + 'VERIFICATION FAILED'], + output): + status = result.STATUS_FALSE_DEREF + elif self.allInText(['Violated property:', + 'Operand of free must have zero pointer offset', + 'VERIFICATION FAILED'], + output): + status = result.STATUS_FALSE_FREE + elif self.allInText(['Violated property:', + 'error label', + 'VERIFICATION FAILED'], + output): + status = result.STATUS_FALSE_REACH + elif self.allInText(['Violated property:', + 'assertion', + 'VERIFICATION FAILED'], + output): + status = result.STATUS_FALSE_REACH + elif self.allInText(['Violated property:', + 'dereference failure: forgotten memory', + 'VERIFICATION FAILED'], + output): + status = result.STATUS_FALSE_MEMTRACK + elif 'VERIFICATION SUCCESSFUL' in output: + status = result.STATUS_TRUE_PROP + + if status == result.STATUS_UNKNOWN: + if isTimeout: + status = 'TIMEOUT' + elif output.endswith(('Z3 Error 9', 'Z3 Error 9\n')): + status = 'ERROR (Z3 Error 9)' + elif output.endswith(('error', 'error\n')): + status = 'ERROR' + elif 'Encountered Z3 conversion error:' in output: + status = 'ERROR (Z3 conversion error)' + + return status + + + def add_column_values(self, output, columns): + """ + This method adds the values that the user requested to the column objects. + If a value is not found, it should be set to '-'. + If not supported, this method does not need to get overridden. + """ + pass + + """ helper method """ + def allInText(self, words, text): + """ + This function checks, if all the words appear in the given order in the text. + """ + index = 0 + for word in words: + index = text[index:].find(word) + if index == -1: + return False + return True diff --git a/benchexec/tools/evolcheck.py b/benchexec/tools/evolcheck.py new file mode 100644 index 000000000..5c7c56659 --- /dev/null +++ b/benchexec/tools/evolcheck.py @@ -0,0 +1,85 @@ +import logging +import os +import platform +import tempfile +import subprocess +import hashlib +import xml.etree.ElementTree as ET + +import benchexec.util as util +import benchexec.tools.template +import benchexec.result as result + +class Tool(benchexec.tools.template.BaseTool): + + previousStatus = None + + def executable(self): + return util.find_executable('evolcheck_wrapper') + + + def version(self, executable): + return subprocess.Popen([executable, '--version'], + stdout=subprocess.PIPE).communicate()[0].strip() + + def name(self): + return 'eVolCheck' + + def preprocessSourcefile(self, sourcefile): + gotoCcExecutable = util.find_executable('goto-cc') + # compile with goto-cc to same file, bith '.cc' appended + self.preprocessedFile = sourcefile + ".cc" + + subprocess.Popen([gotoCcExecutable, + sourcefile, + '-o', + self.preprocessedFile], + stdout=subprocess.PIPE).wait() + + return self.preprocessedFile + + + def cmdline(self, executable, options, sourcefiles, propertyfile, rlimits): + assert len(sourcefiles) == 1, "only one sourcefile supported" + sourcefile = sourcefiles[0] + sourcefile = self.preprocessSourcefile(sourcefile) + + # also append '.cc' to the predecessor-file + if '--predecessor' in options : + options[options.index('--predecessor') + 1] = options[options.index('--predecessor') + 1] + '.cc' + + return [executable] + [sourcefile] + options + + def determine_result(self, returncode, returnsignal, output, isTimeout): + if not os.path.isfile(self.preprocessedFile): + return 'ERROR (goto-cc)' + + status = None + + assertionHoldsFound = False + verificationSuccessfulFound = False + verificationFailedFound = False + + for line in output: + if 'A real bug found.' in line: + status = result.STATUS_FALSE_REACH + elif 'VERIFICATION SUCCESSFUL' in line: + verificationSuccessfulFound = True + elif 'VERIFICATION FAILED' in line: + verificationFailedFound = True + elif 'ASSERTION(S) HOLD(S)' in line: + assertionHoldsFound = True + elif 'The program models are identical' in line: + status = self.previousStatus + elif 'Assertion(s) hold trivially.' in line: + status = result.STATUS_TRUE_PROP + + if status is None: + if verificationSuccessfulFound and not verificationFailedFound: + status = result.STATUS_TRUE_PROP + else: + status = result.STATUS_UNKNOWN + + self.previousStatus = status + + return status diff --git a/benchexec/tools/false.py b/benchexec/tools/false.py new file mode 100644 index 000000000..6d9612951 --- /dev/null +++ b/benchexec/tools/false.py @@ -0,0 +1,21 @@ +import benchexec.tools.template +import benchexec.result as result + +class Tool(benchexec.tools.template.BaseTool): + """ + This tool is an imaginary tool that returns always UNSAFE. + To use it you need a normal benchmark-xml-file + with the tool and sourcefiles, however options are ignored. + """ + + def executable(self): + return '/bin/false' + + def name(self): + return 'AlwaysFalseReach' + + def cmdline(self, executable, options, sourcefiles, propertyfile, rlimits): + return [executable] + sourcefiles + + def determine_result(self, returncode, returnsignal, output, isTimeout): + return result.STATUS_FALSE_REACH \ No newline at end of file diff --git a/benchexec/tools/feaver.py b/benchexec/tools/feaver.py new file mode 100644 index 000000000..d5c121b86 --- /dev/null +++ b/benchexec/tools/feaver.py @@ -0,0 +1,70 @@ +import os + +import benchexec.util as util +import benchexec.tools.template +import benchexec.result as result + +class Tool(benchexec.tools.template.BaseTool): + + def executable(self): + return util.find_executable('feaver_cmd') + + + def name(self): + return 'Feaver' + + + def cmdline(self, executable, options, sourcefiles, propertyfile, rlimits): + assert len(sourcefiles) == 1, "only one sourcefile supported" + sourcefile = sourcefiles[0] + + # create tmp-files for feaver, feaver needs special error-labels + self.prepSourcefile = _prepareSourcefile(sourcefile) + + return [executable] + ["--file"] + [self.prepSourcefile] + options + + + def _prepareSourcefile(self, sourcefile): + content = open(sourcefile, "r").read() + content = content.replace("goto ERROR;", "assert(0);") + newFilename = "tmp_benchmark_feaver.c" + util.write_file(newFilename, content) + return newFilename + + + def determine_result(self, returncode, returnsignal, output, isTimeout): + output = '\n'.join(output) + if "collect2: ld returned 1 exit status" in output: + status = "COMPILE ERROR" + + elif "Error (parse error" in output: + status = "PARSE ERROR" + + elif "error: (\"model\":" in output: + status = "MODEL ERROR" + + elif "Error: syntax error" in output: + status = "SYNTAX ERROR" + + elif "error: " in output or "Error: " in output: + status = "ERROR" + + elif "Error Found:" in output: + status = result.STATUS_FALSE_REACH + + elif "No Errors Found" in output: + status = result.STATUS_TRUE_PROP + + else: + status = result.STATUS_UNKNOWN + + # delete tmp-files + for tmpfile in [self.prepSourcefile, self.prepSourcefile[0:-1] + "M", + "_modex_main.spn", "_modex_.h", "_modex_.cln", "_modex_.drv", + "model", "pan.b", "pan.c", "pan.h", "pan.m", "pan.t"]: + try: + os.remove(tmpfile) + except OSError: + pass + + return status diff --git a/benchexec/tools/llbmc.py b/benchexec/tools/llbmc.py new file mode 100644 index 000000000..f3c8a9b67 --- /dev/null +++ b/benchexec/tools/llbmc.py @@ -0,0 +1,79 @@ +import os +import subprocess + +import benchexec.util as util +import benchexec.tools.template +import benchexec.result as result + +class Tool(benchexec.tools.template.BaseTool): + """ + This class serves as tool adaptor for LLBMC + """ + + def executable(self): + return util.find_executable('lib/native/x86_64-linux/llbmc') + + + def version(self, executable): + return subprocess.Popen([executable, '--version'], + stdout=subprocess.PIPE).communicate()[0].splitlines()[2][8:18] + + + def name(self): + return 'LLBMC' + + + def cmdline(self, executable, options, sourcefiles, propertyfile, rlimits): + assert len(sourcefiles) == 1, "only one sourcefile supported" + sourcefile = sourcefiles[0] + # compile sourcefile with clang + self.prepSourcefile = self._prepareSourcefile(sourcefile) + + return [executable] + options + [self.prepSourcefile] + + + def _prepareSourcefile(self, sourcefile): + clangExecutable = util.find_executable('clang') + newFilename = sourcefile + ".o" + + subprocess.Popen([clangExecutable, + '-c', + '-emit-llvm', + '-std=gnu89', + '-m32', + sourcefile, + '-O0', + '-o', + newFilename, + '-w'], + stdout=subprocess.PIPE).wait() + + return newFilename + + + def determine_result(self, returncode, returnsignal, output, isTimeout): + status = result.STATUS_UNKNOWN + + for line in output: + if 'Error detected.' in line: + status = result.STATUS_FALSE_REACH + elif 'No error detected.' in line: + status = result.STATUS_TRUE_PROP + + # delete tmp-files + try: + os.remove(self.prepSourcefile) + except OSError, e: + print "Could not remove file " + self.prepSourcefile + "! Maybe clang call failed" + pass + + return status + + + def add_column_values(self, output, columns): + """ + This method adds the values that the user requested to the column objects. + If a value is not found, it should be set to '-'. + If not supported, this method does not need to get overridden. + """ + pass diff --git a/benchexec/tools/rand.py b/benchexec/tools/rand.py new file mode 100644 index 000000000..9bc78b4a5 --- /dev/null +++ b/benchexec/tools/rand.py @@ -0,0 +1,22 @@ +from random import random +import benchexec.tools.template +import benchexec.result as result + +class Tool(benchexec.tools.template.BaseTool): + """ + This tool is an imaginary tool that randomly returns SAFE and UNSAFE. + To use it you need a normal benchmark-xml-file + with the tool and sourcefiles, however options are ignored. + """ + + def executable(self): + return '/bin/true' + + def name(self): + return 'Random' + + def cmdline(self, executable, options, sourcefiles, propertyfile, rlimits): + return [executable] + sourcefiles + + def determine_result(self, returncode, returnsignal, output, isTimeout): + return result.STATUS_TRUE_PROP if random() < 0.5 else result.STATUS_FALSE_REACH \ No newline at end of file diff --git a/benchexec/tools/satabs.py b/benchexec/tools/satabs.py new file mode 100644 index 000000000..372251a9b --- /dev/null +++ b/benchexec/tools/satabs.py @@ -0,0 +1,41 @@ +import subprocess + +import benchexec.util as util +import benchexec.tools.template +import benchexec.result as result + +class Tool(benchexec.tools.template.BaseTool): + + def executable(self): + return util.find_executable('satabs') + + + def version(self, executable): + return subprocess.Popen([executable, '--version'], + stdout=subprocess.PIPE).communicate()[0].strip() + + + def name(self): + return 'SatAbs' + + + def determine_result(self, returncode, returnsignal, output, isTimeout): + output = '\n'.join(output) + if "VERIFICATION SUCCESSFUL" in output: + assert returncode == 0 + status = result.STATUS_TRUE_PROP + elif "VERIFICATION FAILED" in output: + assert returncode == 10 + status = result.STATUS_FALSE_REACH + elif returnsignal == 9: + status = "TIMEOUT" + elif returnsignal == 6: + if "Assertion `!counterexample.steps.empty()' failed" in output: + status = 'COUNTEREXAMPLE FAILED' # TODO: other status? + else: + status = "OUT OF MEMORY" + elif returncode == 1 and "PARSING ERROR" in output: + status = "PARSING ERROR" + else: + status = "FAILURE" + return status diff --git a/benchexec/tools/template.py b/benchexec/tools/template.py new file mode 100644 index 000000000..7efdd24ca --- /dev/null +++ b/benchexec/tools/template.py @@ -0,0 +1,108 @@ +import benchexec.util as util + +class BaseTool(object): + """ + This class serves both as a template for tool adaptor implementations, + and as an abstract super class for them. + For writing a new tool adaptor, inherit from this class and override + the necessary methods (usually only executable(), name(), and determine_result(), + maybe version() and cmdline(), too). + The classes for each specific tool need to be named "Tool" + and be located in a module named "benchmark.tools.", + where "" is the string specified by the user in the benchmark definition. + """ + + def executable(self): + """ + Find the path to the executable file that will get executed. + This method always needs to be overridden, + and most implementations will look similar to this one. + """ + return util.find_executable('tool') + + + def version(self, executable): + """ + Determine a version string for this tool, if available. + """ + return '' + + + def name(self): + """ + Return the name of the tool, formatted for humans. + """ + return 'UNKOWN' + + + def cmdline(self, executable, options, sourcefiles, propertyfile=None, rlimits={}): + """ + Compose the command line to execute from the name of the executable, + the user-specified options, and the sourcefile to analyze. + This method can get overridden, if, for example, some options should + be enabled or if the order of arguments must be changed. + + @param executable: the path to the executable of the tool, the result of executable() + @param options: a list of options, in the same order as given in the XML-file. + @param sourcefiles: a list of sourcefiles, that should be analysed with the tool in one run. + In most cases we we have only _one_ sourcefile. + @param propertyfile: contains a specification for the verifier. + @param rlimits: This dictionary contains resource-limits for a run, + for example: time-limit, soft-time-limit, hard-time-limit, memory-limit, cpu-core-limit. + All entries in rlimits are optional, so check for existence before usage! + """ + return [executable] + options + sourcefiles + + + def determine_result(self, returncode, returnsignal, output, isTimeout): + """ + Parse the output of the tool and extract the verification result. + This method always needs to be overridden. + """ + return 'UNKNOWN' + + + def add_column_values(self, output, columns): + """ + OPTIONAL, override this to add statistics data from the output of the tool + to the tables if requested by the user. + If a value is not found, it should be set to '-'. + """ + pass + + + def program_files(self, executable): + """ + OPTIONAL, this method is only necessary for situations when the benchmark environment + needs to know all files belonging to a tool + (to transport them to a cloud service, for example). + Returns a list of files or directories that are necessary to run the tool. + """ + return [] + + + def working_directory(self, executable): + """ + OPTIONAL, this method is only necessary for situations + when the tool needs a separate working directory. + """ + return "." + + + def environment(self, executable): + """ + OPTIONAL, this method is only necessary for tools + that needs special environment variable. + Returns a map, that contains identifiers for several submaps. + All keys and values have to be Strings! + + Currently we support 2 identifiers: + + "newEnv": Before the execution, the values are assigned to the real environment-identifiers. + This will override existing values. + "additionalEnv": Before the execution, the values are appended to the real environment-identifiers. + The seperator for the appending must be given in this method, + so that the operation "realValue + additionalValue" is a valid value. + For example in the PATH-variable the additionalValue starts with a ":". + """ + return {} diff --git a/benchexec/tools/threader.py b/benchexec/tools/threader.py new file mode 100644 index 000000000..4755c52fa --- /dev/null +++ b/benchexec/tools/threader.py @@ -0,0 +1,60 @@ + +import subprocess +import os +import benchexec.util as util +import benchexec.tools.template +import benchexec.result as result + +class Tool(benchexec.tools.template.BaseTool): + """ + This class serves as tool adaptor for Threader (http://www.esbmc.org/) + """ + + def executable(self): + return util.find_executable('threader.sh') + + + def program_files(self, executable): + executableDir = os.path.dirname(executable) + return [executableDir] + + + def working_directory(self, executable): + executableDir = os.path.dirname(executable) + return executableDir + + + def environment(self, executable): + return {"additionalEnv" : {'PATH' : ':.'}} + + + def version(self, executable): + exe = 'cream' + return subprocess.Popen([exe, '--help'], stdout=subprocess.PIPE)\ + .communicate()[0].splitlines()[2][34:42] + + + def name(self): + return 'Threader' + + + def cmdline(self, executable, options, sourcefiles, propertyfile, rlimits): + assert len(sourcefiles) == 1, "only one sourcefile supported" + sourcefile = sourcefiles[0] + workingDir = self.working_directory(executable) + return [os.path.relpath(executable, start=workingDir)] + options + [os.path.relpath(sourcefile, start=workingDir)] + + + def determine_result(self, returncode, returnsignal, output, isTimeout): + output = '\n'.join(output) + if 'SSSAFE' in output: + status = result.STATUS_TRUE_PROP + elif 'UNSAFE' in output: + status = result.STATUS_FALSE_REACH + else: + status = result.STATUS_UNKNOWN + + if status == result.STATUS_UNKNOWN and isTimeout: + status = 'TIMEOUT' + + return status diff --git a/benchexec/tools/true.py b/benchexec/tools/true.py new file mode 100644 index 000000000..a9e86c822 --- /dev/null +++ b/benchexec/tools/true.py @@ -0,0 +1,20 @@ +import benchexec.tools.template +import benchexec.result as result + +class Tool(benchexec.tools.template.BaseTool): + """ + This tool is an imaginary tool that returns always SAFE. + To use it you need a normal benchmark-xml-file + with the tool and sourcefiles, however options are ignored. + """ + def executable(self): + return '/bin/true' + + def name(self): + return 'AlwaysTrue' + + def cmdline(self, executable, options, sourcefiles, propertyfile, rlimits): + return [executable] + sourcefiles + + def determine_result(self, returncode, returnsignal, output, isTimeout): + return result.STATUS_TRUE_PROP \ No newline at end of file diff --git a/benchexec/tools/ufo.py b/benchexec/tools/ufo.py new file mode 100644 index 000000000..76fd86eca --- /dev/null +++ b/benchexec/tools/ufo.py @@ -0,0 +1,31 @@ +import benchexec.util as util +import benchexec.tools.template + +class Tool(benchexec.tools.template.BaseTool): + + def executable(self): + return util.find_executable('ufo.sh') + + + def name(self): + return 'Ufo' + + + def determine_result(self, returncode, returnsignal, output, isTimeout): + output = '\n'.join(output) + if returnsignal == 9 or returnsignal == (128+9): + if isTimeout: + status = "TIMEOUT" + else: + status = "KILLED BY SIGNAL 9" + elif returncode == 1 and "program correct: ERROR unreachable" in output: + status = "SAFE" + elif returncode != 0: + status = "ERROR ({0})".format(returncode) + elif "ERROR reachable" in output: + status = "UNSAFE" + elif "program correct: ERROR unreachable" in output: + status = "SAFE" + else: + status = "FAILURE" + return status \ No newline at end of file diff --git a/benchexec/tools/wolverine.py b/benchexec/tools/wolverine.py new file mode 100644 index 000000000..5b9736ec2 --- /dev/null +++ b/benchexec/tools/wolverine.py @@ -0,0 +1,38 @@ +import subprocess + +import benchexec.util as util +import benchexec.tools.template +import benchexec.result as result + +class Tool(benchexec.tools.template.BaseTool): + + def executable(self): + return util.find_executable('wolverine') + + + def version(self, executable): + return subprocess.Popen([executable, '--version'], + stdout=subprocess.PIPE).communicate()[0].split()[1].strip() + + + def name(self): + return 'Wolverine' + + + def determine_result(self, returncode, returnsignal, output, isTimeout): + output = '\n'.join(output) + if "VERIFICATION SUCCESSFUL" in output: + assert returncode == 0 + status = result.STATUS_TRUE_PROP + elif "VERIFICATION FAILED" in output: + assert returncode == 10 + status = result.STATUS_FALSE_REACH + elif returnsignal == 9: + status = "TIMEOUT" + elif returnsignal == 6 or (returncode == 6 and "Out of memory" in output): + status = "OUT OF MEMORY" + elif returncode == 6 and "PARSING ERROR" in output: + status = "PARSING ERROR" + else: + status = "FAILURE" + return status \ No newline at end of file diff --git a/benchexec/util.py b/benchexec/util.py new file mode 100644 index 000000000..3f2e76780 --- /dev/null +++ b/benchexec/util.py @@ -0,0 +1,348 @@ +""" +CPAchecker is a tool for configurable software verification. +This file is part of CPAchecker. + +Copyright (C) 2007-2014 Dirk Beyer +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +CPAchecker web page: + http://cpachecker.sosy-lab.org +""" + +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals + +import errno +import glob +import logging +import os +import signal +import subprocess +import sys +import xml.etree.ElementTree as ET + +""" +This module contains some useful functions for Strings, XML or Lists. +""" + +ENERGY_TYPES = ['cpu', 'core', 'uncore', 'external'] + +def is_windows(): + return os.name == 'nt' + +def force_linux_path(path): + if is_windows(): + return path.replace('\\', '/') + return path + +def kill_process(pid, sig=signal.SIGKILL): + ''' + This function kills the process and the children in its process group. + ''' + try: + os.kill(pid, sig) + except OSError as e: + if e.errno == errno.ESRCH: # process itself returned and exited before killing + logging.debug("Failure {0} while killing process {1} with signal {2}: {3}".format(e.errno, pid, sig, e.strerror)) + else: + logging.warning("Failure {0} while killing process {1} with signal {2}: {3}".format(e.errno, pid, sig, e.strerror)) + +def printOut(value, end='\n'): + """ + This function prints the given String immediately and flushes the output. + """ + sys.stdout.write(value) + sys.stdout.write(end) + sys.stdout.flush() + +def is_code(filename): + """ + This function returns True, if a line of the file contains bracket '{'. + """ + with open(filename, "r") as file: + for line in file: + # ignore comments and empty lines + if not is_comment(line) \ + and '{' in line: # <-- simple indicator for code + if '${' not in line: # <-- ${abc} variable to substitute + return True + return False + +def is_comment(line): + return not line or line.startswith("#") or line.startswith("//") + + +def remove_all(list, elemToRemove): + return [elem for elem in list if elem != elemToRemove] + + +def flatten(iterable, exclude=[]): + return [value for sublist in iterable for value in sublist if not value in exclude] + + +def get_list_from_xml(elem, tag="option", attributes=["name"]): + ''' + This function searches for all "option"-tags and returns a list with all attributes and texts. + ''' + return flatten(([option.get(attr) for attr in attributes] + [option.text] for option in elem.findall(tag)), exclude=[None]) + + +def copy_of_xml_element(elem): + """ + This method returns a shallow copy of a XML-Element. + This method is for compatibility with Python 2.6 or earlier.. + In Python 2.7 you can use 'copyElem = elem.copy()' instead. + """ + + copyElem = ET.Element(elem.tag, elem.attrib) + for child in elem: + copyElem.append(child) + return copyElem + + +def xml_to_string(elem): + """ + Return a pretty-printed XML string for the Element. + """ + from xml.dom import minidom + rough_string = ET.tostring(elem, 'utf-8') + reparsed = minidom.parseString(rough_string) + return reparsed.toprettyxml(indent=" ") + + +def decode_to_string(toDecode): + """ + This function is needed for Python 3, + because a subprocess can return bytes instead of a string. + """ + try: + return toDecode.decode('utf-8') + except AttributeError: # bytesToDecode was of type string before + return toDecode + + +def format_number(number, number_of_digits): + """ + The function format_number() return a string-representation of a number + with a number of digits after the decimal separator. + If the number has more digits, it is rounded. + If the number has less digits, zeros are added. + + @param number: the number to format + @param digits: the number of digits + """ + if number is None: + return "" + return "%.{0}f".format(number_of_digits) % number + + +def parse_int_list(s): + """ + Parse a comma-separated list of strings. + The list may additionally contain ranges such as "1-5", + which will be expanded into "1,2,3,4,5". + """ + result = [] + for item in s.split(','): + item = item.strip().split('-') + if len(item) == 1: + result.append(int(item[0])) + elif len(item) == 2: + start, end = item + result.extend(range(int(start), int(end)+1)) + else: + raise ValueError("invalid range: '{0}'".format(s)) + return result + + +def expand_filename_pattern(pattern, base_dir): + """ + Expand a file name pattern containing wildcards, environment variables etc. + + @param pattern: The pattern string to expand. + @param base_dir: The directory where relative paths are based on. + @return: A list of file names (possibly empty). + """ + # 'join' ignores base_dir, if expandedPattern is absolute. + # 'normpath' replaces 'A/foo/../B' with 'A/B', for pretty printing only + pattern = os.path.normpath(os.path.join(base_dir, pattern)) + + # expand tilde and variables + pattern = os.path.expandvars(os.path.expanduser(pattern)) + + # expand wildcards + fileList = glob.glob(pattern) + + return fileList + + +def get_files(paths): + changed = False + result = [] + for path in paths: + if os.path.isfile(path): + result.append(path) + elif os.path.isdir(path): + changed = True + for currentPath, dirs, files in os.walk(path): + # ignore hidden files, on Linux they start with '.', + # inplace replacement of 'dirs', because it is used later in os.walk + files = [f for f in files if not f.startswith('.')] + dirs[:] = [d for d in dirs if not d.startswith('.')] + result.extend(os.path.join(currentPath, f) for f in files) + return result if changed else paths + + +def find_executable(program, fallback=None, exitOnError=True): + def is_executable(programPath): + return os.path.isfile(programPath) and os.access(programPath, os.X_OK) + + dirs = os.environ['PATH'].split(os.path.pathsep) + dirs.append(os.path.curdir) + + for dir in dirs: + name = os.path.join(dir, program) + if is_executable(name): + return name + + if fallback is not None and is_executable(fallback): + return fallback + + if exitOnError: + sys.exit("ERROR: Could not find '{0}' executable".format(program)) + else: + return fallback + + +def common_base_dir(l): + # os.path.commonprefix returns the common prefix, not the common directory + return os.path.dirname(os.path.commonprefix(l)) + + +def write_file(content, *path): + """ + Simply write some content to a file, overriding the file if necessary. + """ + with open(os.path.join(*path), "w") as file: + return file.write(content) + + +def read_file(*path): + """ + Read the full content of a file. + """ + with open(os.path.join(*path)) as f: + return f.read().strip() + + +def add_files_to_git_repository(base_dir, files, description): + """ + Add and commit all files given in a list into a git repository in the + base_dir directory. Nothing is done if the git repository has + local changes. + + @param files: the files to commit + @param description: the commit message + """ + if not os.path.isdir(base_dir): + printOut('Output path is not a directory, cannot add files to git repository.') + return + + # find out root directory of repository + gitRoot = subprocess.Popen(['git', 'rev-parse', '--show-toplevel'], + cwd=base_dir, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout = gitRoot.communicate()[0] + if gitRoot.returncode != 0: + printOut('Cannot commit results to repository: git rev-parse failed, perhaps output path is not a git directory?') + return + gitRootDir = decode_to_string(stdout).splitlines()[0] + + # check whether repository is clean + gitStatus = subprocess.Popen(['git','status','--porcelain', '--untracked-files=no'], + cwd=gitRootDir, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (stdout, stderr) = gitStatus.communicate() + if gitStatus.returncode != 0: + printOut('Git status failed! Output was:\n' + decode_to_string(stderr)) + return + + if stdout: + printOut('Git repository has local changes, not commiting results.') + return + + # add files to staging area + files = [os.path.realpath(file) for file in files] + gitAdd = subprocess.Popen(['git', 'add', '--'] + files, + cwd=gitRootDir) + if gitAdd.wait() != 0: + printOut('Git add failed, will not commit results!') + return + + # commit files + printOut('Committing results files to git repository in ' + gitRootDir) + gitCommit = subprocess.Popen(['git', 'commit', '--file=-', '--quiet'], + cwd=gitRootDir, + stdin=subprocess.PIPE) + gitCommit.communicate(description.encode('UTF-8')) + if gitCommit.returncode != 0: + printOut('Git commit failed!') + return + + + +def measure_energy(oldEnergy=None): + ''' + returns a dictionary with the currently available values of energy consumptions (like a time-stamp). + If oldEnergy is not None, the difference (currentValue - oldEnergy) is returned. + ''' + newEnergy = {} + + executable = find_executable('read-energy.sh', exitOnError=False) + if executable is None: # not available on current system + logging.debug('Energy measurement not available because read-energy.sh could not be found.') + return newEnergy + + for energyType in ENERGY_TYPES: + logging.debug('Reading {0} energy measurement for value.'.format(energyType)) + energysh = subprocess.Popen([executable, energyType], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (stdout, stderr) = energysh.communicate() + if energysh.returncode or stderr: + logging.debug('Error while reading {0} energy measurement: retval={3}, out={1}, err={2}'.format(energyType, stdout, stderr, energysh.returncode)) + try: + newEnergy[energyType] = int(stdout) + except ValueError: + logging.debug('Invalid value while reading {0} energy measurement: {1}'.format(energyType, stdout, stderr, energysh.returncode)) + pass # do nothing + + logging.debug('Finished reading energy measurements.') + + if oldEnergy is None: + return newEnergy + else: + return _energy_difference(newEnergy, oldEnergy) + + +def _energy_difference(newEnergy, oldEnergy): + ''' + returns a dict with (newEnergy - oldEnergy) for each type (=key) of energy, + but only, if both values exist + ''' + diff = {} + for key in newEnergy: + if key in oldEnergy: + diff[key] = newEnergy[key] - oldEnergy[key] + return diff diff --git a/benchmark-result.dtd b/benchmark-result.dtd new file mode 100644 index 000000000..450bab19d --- /dev/null +++ b/benchmark-result.dtd @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/benchmark.dtd b/benchmark.dtd new file mode 100644 index 000000000..31ce4bb84 --- /dev/null +++ b/benchmark.dtd @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/runexecutor.py b/runexecutor.py new file mode 100755 index 000000000..17f97e3b0 --- /dev/null +++ b/runexecutor.py @@ -0,0 +1,34 @@ +#!/usr/bin/python +""" +CPAchecker is a tool for configurable software verification. +This file is part of CPAchecker. + +Copyright (C) 2007-2014 Dirk Beyer +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +CPAchecker web page: + http://cpachecker.sosy-lab.org +""" + +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals + +import sys +sys.dont_write_bytecode = True # prevent creation of .pyc files + +import benchexec.runexecutor + +sys.exit(benchexec.runexecutor.main()) diff --git a/table-generator.dtd b/table-generator.dtd new file mode 100644 index 000000000..a73325363 --- /dev/null +++ b/table-generator.dtd @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/table-generator.py b/table-generator.py new file mode 100755 index 000000000..2fac89dee --- /dev/null +++ b/table-generator.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +""" +CPAchecker is a tool for configurable software verification. +This file is part of CPAchecker. + +Copyright (C) 2007-2014 Dirk Beyer +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +CPAchecker web page: + http://cpachecker.sosy-lab.org +""" + +# prepare for Python 3 +from __future__ import absolute_import, division, print_function, unicode_literals + +import sys +sys.dont_write_bytecode = True # prevent creation of .pyc files + +import benchexec.tablegenerator + +try: + sys.exit(benchexec.tablegenerator.main()) +except KeyboardInterrupt: + print ('Interrupted')