From ce8925e2e809c3e54d053af3addd230743a07317 Mon Sep 17 00:00:00 2001 From: xin liang Date: Sun, 23 Jul 2023 23:02:46 +0800 Subject: [PATCH 01/14] Dev: log: Add funcName for DEBUG2 log level --- crmsh/log.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/crmsh/log.py b/crmsh/log.py index 278a738353..689048076f 100644 --- a/crmsh/log.py +++ b/crmsh/log.py @@ -64,6 +64,9 @@ def format(self, record): msg = record.msg record.msg = "{}: {}".format(self.lineno, msg) record.levelname = levelname + if record.levelname == "DEBUG2": + msg = record.msg + record.msg = f"{record.funcName}: {msg}" return super().format(record) @@ -93,9 +96,21 @@ class DebugCustomFilter(logging.Filter): A custom filter for debug message """ def filter(self, record): - from .config import core, report + from .config import core if record.levelname == "DEBUG": - return core.debug or int(report.verbosity) >= 1 + return core.debug + else: + return True + + +class ReportDebugCustomFilter(logging.Filter): + """ + A custom filter for crm report debug message + """ + def filter(self, record): + from .config import report + if record.levelname == "DEBUG": + return int(report.verbosity) >= 1 if record.levelname == "DEBUG2": return int(report.verbosity) > 1 else: @@ -138,6 +153,9 @@ def _open(self): "filter": { "()": DebugCustomFilter }, + "filter_report": { + "()": ReportDebugCustomFilter + }, }, "handlers": { 'null': { @@ -146,7 +164,7 @@ def _open(self): "console_report": { "()": ConsoleCustomHandler, "formatter": "console_report", - "filters": ["filter"] + "filters": ["filter_report"] }, "console": { "()": ConsoleCustomHandler, From a26504c457af09233de36aedc79c323e8e5b7ff8 Mon Sep 17 00:00:00 2001 From: xin liang Date: Tue, 15 Aug 2023 09:15:31 +0800 Subject: [PATCH 02/14] Dev: config: Exclude pacemaker.log from collect_extra_logs as it is intentionally collected by the crm report --- crmsh/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crmsh/config.py b/crmsh/config.py index 54fdbaea15..0530ef0efe 100644 --- a/crmsh/config.py +++ b/crmsh/config.py @@ -295,8 +295,8 @@ def get(self, value): 'from_time': opt_string('-12H'), 'compress': opt_boolean('yes'), 'speed_up': opt_boolean('no'), - 'collect_extra_logs': opt_string('/var/log/messages /var/log/pacemaker/pacemaker.log \ - /var/log/pacemaker.log /var/log/crmsh/crmsh.log /etc/crm/profiles.yml /etc/crm/crm.conf'), + 'collect_extra_logs': opt_string('/var/log/messages \ + /var/log/crmsh/crmsh.log /etc/crm/profiles.yml /etc/crm/crm.conf'), 'remove_exist_dest': opt_boolean('no'), 'single_node': opt_boolean('no'), 'sanitize_rule': opt_string('passw.*'), From c46cadec5f2e29f80d6cbb4f0fd9467da697432e Mon Sep 17 00:00:00 2001 From: xin liang Date: Mon, 16 Oct 2023 14:48:49 +0800 Subject: [PATCH 03/14] Dev: utils: Remove unused codes --- crmsh/bootstrap.py | 9 ---- crmsh/corosync.py | 11 ----- crmsh/utils.py | 113 --------------------------------------------- 3 files changed, 133 deletions(-) diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index 6f5a834af6..05a1bea478 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -228,8 +228,6 @@ def _validate_nodes_option(self): me = utils.this_node() was_localhost_already = False li = [utils.parse_user_at_host(x) for x in self.user_at_node_list] - if utils.has_dup_value([node for user, node in li]): - utils.fatal("Duplicated host in -N/--nodes options.") for user in (user for user, node in li if node == me and user is not None and user != self.current_user): utils.fatal(f"Overriding current user '{self.current_user}' by '{user}'. Ouch, don't do it.") self.user_at_node_list = [value for (user, node), value in zip(li, self.user_at_node_list) if node != me] @@ -794,13 +792,6 @@ def start_pacemaker(node_list=[], enable_flag=False): return service_manager.start_service("pacemaker.service", enable=enable_flag, node_list=node_list) -def install_tmp(tmpfile, to): - with open(tmpfile, "r") as src: - with utils.open_atomic(to, "w") as dst: - for line in src: - dst.write(line) - - def append(fromfile, tofile, remote=None): cmd = "cat {} >> {}".format(fromfile, tofile) sh.cluster_shell().get_stdout_or_raise_error(cmd, host=remote) diff --git a/crmsh/corosync.py b/crmsh/corosync.py index 1149f035df..71d38b29ac 100644 --- a/crmsh/corosync.py +++ b/crmsh/corosync.py @@ -35,10 +35,6 @@ def cfgtool(*args): return ShellUtils().get_stdout(['corosync-cfgtool'] + list(args), shell=False) -def quorumtool(*args): - return ShellUtils().get_stdout(['corosync-quorumtool'] + list(args), shell=False) - - def query_status(status_type): """ Query status of corosync @@ -369,13 +365,6 @@ def joiner(tstream): return ''.join(joiner(self._tokens)) -def logfile(conftext): - ''' - Return corosync logfile (if set) - ''' - return Parser(conftext).get('logging.logfile') - - def push_configuration(nodes): ''' Push the local configuration to the list of remote nodes diff --git a/crmsh/utils.py b/crmsh/utils.py index 03812c85f3..1ff671289b 100644 --- a/crmsh/utils.py +++ b/crmsh/utils.py @@ -719,17 +719,6 @@ def file2str(fname, noerr=True): return s.strip() -def file2list(fname): - ''' - Read a file into a list (newlines dropped). - ''' - try: - return open(fname).read().split('\n') - except IOError as msg: - logger.error(msg) - return None - - def safe_open_w(fname): if fname == "-": f = sys.stdout @@ -1163,13 +1152,6 @@ def shortdate(ts): return time.strftime("%F", time.localtime(0)) -def sort_by_mtime(l): - 'Sort a (small) list of files by time mod.' - l2 = [(os.stat(x).st_mtime, x) for x in l] - l2.sort() - return [x[1] for x in l2] - - def file_find_by_name(root, filename): 'Find a file within a tree matching fname' assert root @@ -1668,20 +1650,6 @@ def is_larger_than_pcmk_118(cib_f=None): return is_min_pcmk_ver("1.1.8", cib_f=cib_f) -@memoize -def cibadmin_features(): - ''' - # usage example: - if 'corosync-plugin' in cibadmin_features() - ''' - rc, outp = ShellUtils().get_stdout(['cibadmin', '-!'], shell=False) - if rc == 0: - m = re.match(r'Pacemaker\s(\S+)\s\(Build: ([^\)]+)\):\s(.*)', outp.strip()) - if m and len(m.groups()) > 2: - return m.group(3).split() - return [] - - # quote function from python module shlex.py in python 3.3 _find_unsafe = re.compile(r'[^\w@%+=:,./-]').search @@ -1748,22 +1716,6 @@ def fetch_lifetime_opt(args, iso8601=True): return None -def resolve_hostnames(hostnames): - ''' - Tries to resolve the given list of hostnames. - returns (ok, failed-hostname) - ok: True if all hostnames resolved - failed-hostname: First failed hostname resolution - ''' - import socket - for node in hostnames: - try: - socket.gethostbyname(node) - except socket.error: - return False, node - return True, None - - def list_corosync_node_names(): ''' Returns list of nodes configured @@ -2167,29 +2119,6 @@ def debug_timestamp(): return datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S') -def get_member_iplist(): - rc, out, err= ShellUtils().get_stdout_stderr("corosync-cmapctl -b runtime.totem.pg.mrp.srp.members") - if rc != 0: - logger.debug(err) - return None - - ip_list = [] - for line in out.split('\n'): - match = re.search(r'ip\((.*?)\)', line) - if match: - ip_list.append(match.group(1)) - return ip_list - - -def get_iplist_corosync_using(): - """ - Get ip list used by corosync - """ - rc, out, err = ShellUtils().get_stdout_stderr("corosync-cfgtool -s") - if rc != 0: - raise ValueError(err) - return re.findall(r'id\s*=\s*(.*)', out) - def check_ssh_passwd_need(local_user, remote_user, host): """ Check whether access to host need password @@ -2333,19 +2262,6 @@ def is_ipv6(cls, addr): """ return cls(addr).version == 6 - @classmethod - def is_valid_ip(cls, addr): - """ - Check whether the address is valid IP address - """ - cls_inst = cls(addr) - try: - cls_inst.ip_address - except ValueError: - return False - else: - return True - @property def is_loopback(self): """ @@ -2353,13 +2269,6 @@ def is_loopback(self): """ return self.ip_address.is_loopback - @property - def is_link_local(self): - """ - Check whether the address is link-local address - """ - return self.ip_address.is_link_local - class Interface(IP): """ @@ -2394,12 +2303,6 @@ def network(self): """ return str(self.ip_interface.network.network_address) - def ip_in_network(self, addr): - """ - Check whether the addr in the network - """ - return IP(addr).ip_address in self.ip_interface.network - class InterfacesInfo(object): """ @@ -2551,18 +2454,6 @@ def get_default_ip_list(self): break return _ip_list - @classmethod - def ip_in_network(cls, addr): - """ - Check whether given address was in one of local networks - """ - cls_inst = cls(IP.is_ipv6(addr)) - cls_inst.get_interfaces_info() - for interface_inst in cls_inst.interface_list: - if interface_inst.ip_in_network(addr): - return True - return False - def check_file_content_included(source_file, target_file, remote=None, source_local=False): """ @@ -3046,10 +2937,6 @@ def read_from_file(infile): return to_ascii(data) -def has_dup_value(_list): - return _list and len(_list) != len(set(_list)) - - def detect_file(_file, remote=None): """ Detect if file exists, support both local and remote From 5f784ef44f935558fe71fb6b0267b86b0cf4e88e Mon Sep 17 00:00:00 2001 From: xin liang Date: Sun, 23 Jul 2023 23:03:20 +0800 Subject: [PATCH 04/14] Dev: report: Rewrite crm report module --- crmsh/history.py | 6 +- crmsh/report/collect.py | 599 +++++++++----- crmsh/report/constants.py | 158 ++-- crmsh/report/core.py | 808 ++++++++++--------- crmsh/report/utillib.py | 1602 ------------------------------------- crmsh/report/utils.py | 762 ++++++++++++++++++ crmsh/sh.py | 5 +- crmsh/utils.py | 3 + scripts/health/collect.py | 4 +- 9 files changed, 1669 insertions(+), 2278 deletions(-) delete mode 100644 crmsh/report/utillib.py create mode 100644 crmsh/report/utils.py diff --git a/crmsh/history.py b/crmsh/history.py index 069ab8994f..94a3b73503 100644 --- a/crmsh/history.py +++ b/crmsh/history.py @@ -15,7 +15,7 @@ from . import utils from . import log from .sh import ShellUtils -from crmsh.report import utillib +from crmsh.report import core logger = log.setup_logger(__name__) @@ -107,7 +107,7 @@ def mkarchive(idir): if not home: logger.error("no home directory, nowhere to pack report") return False - _, ext = utillib.pick_first_compress() + _, ext = core.pick_first_compress() if not ext: return False name = os.path.join(home, os.path.basename(idir)) @@ -469,7 +469,7 @@ def new_live_report(self): if not utils.is_path_sane(d): return None utils.rmdir_r(d) - _, ext = utillib.pick_first_compress() + _, ext = core.pick_first_compress() if not ext: return None tarball = f"{d}.tar{ext}" diff --git a/crmsh/report/collect.py b/crmsh/report/collect.py index 7476be0928..10cd048b04 100644 --- a/crmsh/report/collect.py +++ b/crmsh/report/collect.py @@ -9,49 +9,183 @@ import stat import pwd import datetime +from subprocess import TimeoutExpired +from typing import List import crmsh.user_of_host -from crmsh import log, sh +from crmsh import log, sh, corosync from crmsh import utils as crmutils -from crmsh.report import constants, utillib +from crmsh.report import constants, utils, core from crmsh.sh import ShellUtils +from crmsh.service_manager import ServiceManager logger = log.setup_report_logger(__name__) -def collect_ocfs2_info(): - ocfs2_f = os.path.join(constants.WORKDIR, constants.OCFS2_F) - with open(ocfs2_f, "w") as f: - rc, out, err = ShellUtils().get_stdout_stderr("mounted.ocfs2 -d") - if rc != 0: - f.write("Failed to run \"mounted.ocfs2 -d\": {}".format(err)) - return - # No ocfs2 device, just header line printed - elif len(out.split('\n')) == 1: - f.write("No ocfs2 partitions found") - return - - f.write(utillib.dump_D_process()) - f.write(utillib.lsof_ocfs2_device()) - - cmds = [ "dmesg", "ps -efL", - "lsblk -o 'NAME,KNAME,MAJ:MIN,FSTYPE,LABEL,RO,RM,MODEL,SIZE,OWNER,GROUP,MODE,ALIGNMENT,MIN-IO,OPT-IO,PHY-SEC,LOG-SEC,ROTA,SCHED,MOUNTPOINT'", - "mounted.ocfs2 -f", "findmnt", "mount", - "cat /sys/fs/ocfs2/cluster_stack" - ] - for cmd in cmds: - cmd_name = cmd.split()[0] - if not utillib.which(cmd_name) or \ - cmd_name == "cat" and not os.path.exists(cmd.split()[1]): - continue - _, out = ShellUtils().get_stdout(cmd) - f.write("\n\n#=====[ Command ] ==========================#\n") - f.write("# %s\n"%(cmd)) - f.write(out) - - -def collect_ratraces(): +def get_corosync_log() -> str: + """ + Get the path of the corosync log file + """ + corosync_log = "" + corosync_conf_path = corosync.conf() + if os.path.exists(corosync_conf_path): + corosync_log = corosync.get_value("logging.logfile") + else: + logger.warning(f"File {corosync_conf_path} does not exist") + return corosync_log + + +def get_pcmk_log() -> str: + """ + Get the path of the pacemaker log file + """ + pcmk_log_candidates = [ + "/var/log/pacemaker/pacemaker.log", + "/var/log/pacemaker.log" + ] + + if os.path.isfile(constants.PCMKCONF): + data = utils.read_from_file(constants.PCMKCONF) + if data: + res = re.search(r'^ *PCMK_logfile *= *(.*)', data, re.M) + if res: + pcmk_log_candidates.insert(0, res.group(1)) + + for log in pcmk_log_candidates: + if os.path.isfile(log): + return log + + logger.warning("No valid pacemaker log file found") + return "" + + +def collect_ha_logs(context: core.Context) -> None: + """ + Collect pacemaker, corosync and extra logs + """ + log_list = [get_pcmk_log(), get_corosync_log()] + context.extra_log_list + for log in log_list: + if os.path.isfile(log): + utils.dump_logset(context, log) + + +def collect_journal_logs(context: core.Context) -> None: + """ + Collect journal logs from a specific time range + """ + from_time_str = utils.ts_to_str(context.from_time) + to_time_str = utils.ts_to_str(context.to_time) + logger.debug2(f"Collect journal logs since: {from_time_str} until: {to_time_str}") + + journal_target_dict = { + "default": constants.JOURNAL_F, + "pacemaker": constants.JOURNAL_PCMK_F, + "corosync": constants.JOURNAL_COROSYNC_F, + "sbd": constants.JOURNAL_SBD_F + } + for item, outf in journal_target_dict.items(): + journalctl_unit = "" if item == "default" else f" -u {item}" + cmd = f'journalctl{journalctl_unit} -o short-iso-precise --since "{from_time_str}" --until "{to_time_str}" --no-pager | tail -n +2' + output = utils.get_cmd_output(cmd) + logger.debug2(f"Running command: {cmd}") + _file = os.path.join(context.work_dir, outf) + crmutils.str2file(output, _file) + logger.debug(f"Dump jounal log for {item} into {utils.real_path(_file)}") + + +def dump_D_process() -> str: + """ + Dump D-state process stack + """ + out_string = "" + + sh_utils_inst = ShellUtils() + _, out, _ = sh_utils_inst.get_stdout_stderr("ps aux|awk '$8 ~ /^D/{print $2}'") + len_D_process = len(out.split('\n')) if out else 0 + out_string += f"Dump D-state process stack: {len_D_process}\n" + if len_D_process == 0: + return out_string + + for pid in out.split('\n'): + _, cmd_out, _ = sh_utils_inst.get_stdout_stderr(f"cat /proc/{pid}/comm") + out_string += f"pid: {pid} comm: {cmd_out}\n" + _, stack_out, _ = sh_utils_inst.get_stdout_stderr(f"cat /proc/{pid}/stack") + out_string += stack_out + "\n\n" + + return out_string + + +def lsof_ocfs2_device() -> str: + """ + List open files for OCFS2 device + """ + out_string = "" + + sh_utils_inst = ShellUtils() + _, out, _ = sh_utils_inst.get_stdout_stderr("mount") + dev_list = re.findall("^(.*) on .* type ocfs2 ", out, re.MULTILINE) + for dev in dev_list: + cmd = f"lsof {dev}" + out_string += "\n\n#=====[ Command ] ==========================#\n" + out_string += f"# {cmd}\n" + _, cmd_out, _ = sh_utils_inst.get_stdout_stderr(cmd) + if cmd_out: + out_string += cmd_out + + return out_string + + +def ocfs2_commands_output() -> str: + """ + Run ocfs2 related commands, return outputs + """ + out_string = "" + + cmds = [ + "dmesg", + "ps -efL", + "lsblk -o 'NAME,KNAME,MAJ:MIN,FSTYPE,LABEL,RO,RM,MODEL,SIZE,OWNER,GROUP,MODE,ALIGNMENT,MIN-IO,OPT-IO,PHY-SEC,LOG-SEC,ROTA,SCHED,MOUNTPOINT'", + "mounted.ocfs2 -f", + "findmnt", + "mount", + "cat /sys/fs/ocfs2/cluster_stack" + ] + for cmd in cmds: + cmd_name = cmd.split()[0] + if not shutil.which(cmd_name): + continue + if cmd_name == "cat" and not os.path.exists(cmd.split()[1]): + continue + out_string += "\n\n#===== [ Command ] ==========================#\n" + out_string += f"# {cmd}\n" + out_string += utils.get_cmd_output(cmd) + + return out_string + + +def collect_ocfs2_info(context: core.Context) -> None: + """ + Collects OCFS2 information + """ + out_string = "" + rc, out, err = ShellUtils().get_stdout_stderr("mounted.ocfs2 -d") + if rc != 0: + out_string += f"Failed to run \"mounted.ocfs2 -d\": {err}" + # No ocfs2 device, just header line printed + elif len(out.split('\n')) == 1: + out_string += "No ocfs2 partitions found" + else: + out_string += dump_D_process() + out_string += lsof_ocfs2_device() + out_string += ocfs2_commands_output() + + ocfs2_f = os.path.join(context.work_dir, constants.OCFS2_F) + logger.debug(f"Dump OCFS2 information into {utils.real_path(ocfs2_f)}") + crmutils.str2file(out_string, ocfs2_f) + + +def collect_ratraces(context: core.Context) -> None: """ Collect ra trace file from default /var/lib/heartbeat/trace_ra and custom one """ @@ -66,217 +200,300 @@ def collect_ratraces(): else: shell = sh.cluster_shell() log_contents = "" - cmd = "grep 'INFO: Trace for .* is written to ' {}*|grep -v 'collect'".format(log.CRMSH_LOG_FILE) - for node in crmutils.list_cluster_nodes(): + cmd = f"grep 'INFO: Trace for .* is written to ' {log.CRMSH_LOG_FILE}*|grep -v 'collect'" + for node in context.node_list: log_contents += shell.get_rc_stdout_stderr_without_input(node, cmd)[1] + "\n" trace_dir_str = ' '.join(list(set(re.findall("written to (.*)/.*", log_contents)))) if not trace_dir_str: return - logger.debug("Looking for RA trace files in \"%s\"", trace_dir_str) - for f in utillib.find_files(trace_dir_str, constants.FROM_TIME, constants.TO_TIME): - dest_dir = os.path.join(constants.WORKDIR, '/'.join(f.split('/')[-3:-1])) + logger.debug2("Looking for RA trace files in \"%s\"", trace_dir_str) + for f in utils.find_files_in_timespan(context, trace_dir_str.split()): + dest_dir = os.path.join(context.work_dir, '/'.join(f.split('/')[-3:-1])) crmutils.mkdirp(dest_dir) shutil.copy2(f, dest_dir) + logger.debug(f"Dump RA trace files into {utils.real_path(dest_dir)}") -def collect_corosync_blackbox(): +def collect_corosync_blackbox(context: core.Context) -> None: fdata_list = [] - for f in utillib.find_files("/var/lib/corosync", constants.FROM_TIME, constants.TO_TIME): + for f in utils.find_files_in_timespan(context, ["/var/lib/corosync"]): if re.search("fdata", f): fdata_list.append(f) if fdata_list: - blackbox_f = os.path.join(constants.WORKDIR, constants.COROSYNC_RECORDER_F) - crmutils.str2file(utillib.get_command_info("corosync-blackbox")[1], blackbox_f) + blackbox_f = os.path.join(context.work_dir, constants.COROSYNC_RECORDER_F) + out_string = utils.get_cmd_output("corosync-blackbox") + crmutils.str2file(out_string, blackbox_f) + logger.debug(f"Dump corosync blackbox info into {utils.real_path(blackbox_f)}") + +def collect_dlm_info(context: core.Context) -> None: + """ + Get DLM information + """ + if shutil.which("dlm_tool"): + name_list = [] + out_string = "##### NOTICE - Lockspace overview:\n" + out_string += utils.get_cmd_output("dlm_tool ls") + name_list = re.findall("^name\s*(.*)$", out_string, re.MULTILINE) -def collect_time_status(): - out_string = "Time: " - out_string += datetime.datetime.now().strftime('%c') + '\n' - out_string += "ntpdc: " - out_string += utillib.get_command_info("ntpdc -pn")[1] + '\n' + for name in name_list: + out_string += f"\n\n## NOTICE - Lockspace {name}\n" + lockdebug_cmd = f"dlm_tool lockdebug {name}" + out_string += utils.get_cmd_output(lockdebug_cmd) - time_f = os.path.join(constants.WORKDIR, constants.TIME_F) - crmutils.str2file(out_string, time_f) + out_string += "\n\n##### NOTICE - Lockspace history:\n" + out_string += utils.get_cmd_output("dlm_tool dump") + dlm_f = os.path.join(context.work_dir, constants.DLM_DUMP_F) + crmutils.str2file(out_string, dlm_f) + logger.debug(f"Dump DLM information into {utils.real_path(dlm_f)}") -def collect_dlm_info(): + +def collect_perms_state(context: core.Context) -> None: """ - get dlm info + Check and collect permissions and ownership information for specific directories """ - if utillib.which("dlm_tool"): - out_string = "##### NOTICE - Lockspace overview:\n" - out_string += utillib.get_command_info("dlm_tool ls")[1] + '\n' - for item in utillib.grep("^name", incmd="dlm_tool ls"): - lock_name = item.split()[1] - out_string += "## NOTICE - Lockspace {}\n".format(lock_name) - out_string += utillib.get_command_info("dlm_tool lockdebug {}".format(lock_name))[1] + '\n' - out_string += "##### NOTICE - Lockspace history:\n" - out_string += utillib.get_command_info("dlm_tool dump")[1] + '\n' - - dlm_f = os.path.join(constants.WORKDIR, constants.DLM_DUMP_F) - crmutils.str2file(out_string, dlm_f) + results = [] + for check_dir in [context.pcmk_lib_dir, context.pe_dir, context.cib_dir]: + if not os.path.isdir(check_dir): + result = f"{check_dir} is not a directory or does not exist" + else: + stat_info = os.stat(check_dir) + pwd_inst = pwd.getpwnam('hacluster') + expected_uid = pwd_inst.pw_uid + expected_gid = pwd_inst.pw_gid + expected_mode = 0o750 -def collect_perms_state(): - out_string = "" + uid_match = stat_info.st_uid == expected_uid + gid_match = stat_info.st_gid == expected_gid + mode_match = stat_info.st_mode & 0o7777 == expected_mode - for check_dir in [constants.PCMK_LIB, constants.PE_STATE_DIR, constants.CIB_DIR]: - flag = 0 - out_string += "##### Check perms for %s: " % check_dir - stat_info = os.stat(check_dir) - if not stat.S_ISDIR(stat_info.st_mode): - flag = 1 - out_string += "\n%s wrong type or doesn't exist\n" % check_dir - continue - if stat_info.st_uid != pwd.getpwnam('hacluster')[2] or\ - stat_info.st_gid != pwd.getpwnam('hacluster')[3] or\ - "%04o" % (stat_info.st_mode & 0o7777) != "0750": - flag = 1 - out_string += "\nwrong permissions or ownership for %s: " % check_dir - out_string += utillib.get_command_info("ls -ld %s" % check_dir)[1] + '\n' - if flag == 0: - out_string += "OK\n" - - perms_f = os.path.join(constants.WORKDIR, constants.PERMISSIONS_F) - crmutils.str2file(out_string, perms_f) - - -def collect_configurations(): - workdir = constants.WORKDIR - for conf in constants.CONFIGURATIONS: + if uid_match and gid_match and mode_match: + result = "OK" + else: + result = f"Permissions or ownership for {check_dir} are incorrect" + results.append(f"##### Check perms for {check_dir}: {result}\n") + + perms_f = os.path.join(context.work_dir, constants.PERMISSIONS_F) + crmutils.str2file(''.join(results), perms_f) + + +def dump_configurations(workdir: str) -> None: + config_list = constants.CONFIGURATIONS + config_list.append(corosync.conf()) + + for conf in config_list: if os.path.isfile(conf): shutil.copy2(conf, workdir) elif os.path.isdir(conf): shutil.copytree(conf, os.path.join(workdir, os.path.basename(conf))) -def collect_backtraces(): +def find_binary_path_for_core(core_file: str) -> str: """ - Check CORES_DIRS for core dumps within the report timeframe and - use gdb to get the backtraces + Find the binary that generated the given core file """ - cores = utillib.find_files(constants.CORES_DIRS, constants.FROM_TIME, constants.TO_TIME) - flist = [f for f in cores if "core" in os.path.basename(f)] - if flist: - utillib.print_core_backtraces(flist) - logger.debug("found backtraces: %s", ' '.join(flist)) + path_str = "" + cmd = f"gdb --batch cat {core_file}" + _, out, _ = ShellUtils().get_stdout_stderr(cmd) + if out: + res = re.search("Core was generated by `(.*)'", out, re.M) + path_str = res.group(1) if res else "" + + if path_str: + return f"Core {core_file} was generated by {path_str}" + else: + return f"Cannot find the program path for core {core_file}" -def collect_config(): - workdir = constants.WORKDIR - if os.path.isfile(constants.CONF): - shutil.copy2(constants.CONF, workdir) - if crmutils.is_process("pacemaker-controld") or crmutils.is_process("crmd"): - utillib.dump_state(workdir) - open(os.path.join(workdir, "RUNNING"), 'w') +def dump_core_info(workdir: str, core_file_list: List[str]) -> None: + """ + Dump coredump files information into file + """ + out_string = "" + if shutil.which("gdb"): + for core_file in core_file_list: + out_string += find_binary_path_for_core(core_file) + "\n" + out_string += "\nPlease utilize the gdb and debuginfo packages to obtain more detailed information locally" else: - shutil.copy2(os.path.join(constants.CIB_DIR, constants.CIB_F), workdir) - open(os.path.join(workdir, "STOPPED"), 'w') - if os.path.isfile(os.path.join(workdir, constants.CIB_F)): - cmd = "crm_verify -V -x %s" % os.path.join(workdir, constants.CIB_F) - crmutils.str2file(utillib.get_command_info(cmd)[1], os.path.join(workdir, constants.CRM_VERIFY_F)) + msg = "Please install gdb to get more info for coredump files" + out_string += msg + logger.warning(msg) + core_f = os.path.join(workdir, constants.COREDUMP_F) + crmutils.str2file(out_string, core_f) + logger.debug(f"Dump coredump info into {utils.real_path(core_f)}") + + +def collect_coredump_info(context: core.Context) -> None: + """ + Collects coredump files information from the library path of Pacemaker and Corosync + """ + cores = utils.find_files_in_timespan(context, context.cores_dir_list) + flist = [f for f in cores if "core" in os.path.basename(f)] + if flist: + logger.warning(f"Found coredump file: {flist}") + dump_core_info(context.work_dir, flist) + + +def dump_runtime_state(workdir: str) -> None: + """ + Dump runtime state files + """ + cluster_shell_inst = sh.cluster_shell() + for cmd, f, desc in [ + ("crm_mon -1", constants.CRM_MON_F, "cluster state"), + ("cibadmin -Ql", constants.CIB_F, "CIB contents"), + ("crm_node -p", constants.MEMBERSHIP_F, "members of this partition") + ]: + out = cluster_shell_inst.get_stdout_or_raise_error(cmd) + target_f = os.path.join(workdir, f) + crmutils.str2file(out, target_f) + logger.debug(f"Dump {desc} into {utils.real_path(target_f)}") -def collect_dc_file(): - if constants.SKIP_LVL: - return node = crmutils.get_dc() - if node and node == constants.WE: - open(os.path.join(constants.WORKDIR, "DC"), 'w') + if node and node == crmutils.this_node(): + crmutils.str2file("", os.path.join(workdir, "DC")) + logger.debug(f"Current DC is {node}; Touch file 'DC' in {utils.real_path(workdir)}") -def collect_crm_config(): - workdir = constants.WORKDIR - if os.path.isfile(os.path.join(workdir, constants.CIB_F)): - cmd = r"CIB_file=%s/%s crm configure show" % (workdir, constants.CIB_F) - crmutils.str2file(utillib.get_command_info(cmd)[1], os.path.join(workdir, constants.CIB_TXT_F)) +def consume_cib_in_workdir(workdir: str) -> None: + """ + Generate 'crm configure show' and 'crm_verify' outputs based on the cib.xml file in the work directory + """ + cib_in_workdir = os.path.join(workdir, constants.CIB_F) + if os.path.isfile(cib_in_workdir): + cluster_shell_inst = sh.cluster_shell() + cmd = f"CIB_file={cib_in_workdir} crm configure show" + out = cluster_shell_inst.get_stdout_or_raise_error(cmd) + crmutils.str2file(out, os.path.join(workdir, constants.CONFIGURE_SHOW_F)) + cmd = f"crm_verify -V -x {cib_in_workdir}" + out = cluster_shell_inst.get_stdout_or_raise_error(cmd) + crmutils.str2file(out, os.path.join(workdir, constants.CRM_VERIFY_F)) -def collect_pe_inputs(): - from_time = constants.FROM_TIME - to_time = constants.TO_TIME - work_dir = constants.WORKDIR - pe_dir = constants.PE_STATE_DIR - logger.debug("looking for PE files in %s in %s", pe_dir, constants.WE) - flist = [] - for f in utillib.find_files(pe_dir, from_time, to_time): - if re.search("[.]last$", f): - continue - flist.append(f) +def collect_config(context: core.Context) -> None: + """ + """ + workdir = context.work_dir - if flist: - flist_dir = os.path.join(work_dir, os.path.basename(pe_dir)) - utillib._mkdir(flist_dir) - for f in flist: - os.symlink(f, os.path.join(flist_dir, os.path.basename(f))) - logger.debug("found %d pengine input files in %s", len(flist), pe_dir) - - if len(flist) <= 20: - if not constants.SKIP_LVL: - for f in flist: - utillib.pe_to_dot(os.path.join(flist_dir, os.path.basename(f))) - else: - logger.debug("too many PE inputs to create dot files") + if ServiceManager().service_is_active("pacemaker.service"): + dump_runtime_state(workdir) + crmutils.str2file("", os.path.join(workdir, "RUNNING")) + logger.debug(f"Touch file 'RUNNING' in {utils.real_path(workdir)}") else: - logger.debug("Nothing found for the giving time") + # TODO should determine offline node was ha node + shutil.copy2(os.path.join(context.cib_dir, constants.CIB_F), workdir) + crmutils.str2file("", os.path.join(workdir, "STOPPED")) + logger.debug(f"Touch file 'STOPPED' in {utils.real_path(workdir)}") + consume_cib_in_workdir(workdir) + dump_configurations(workdir) -def collect_sbd_info(): + +def pe_to_dot(pe_file: str) -> None: + dotf = os.path.splitext(pe_file)[0] + '.dot' + cmd = f"{constants.PTEST} -D {dotf} -x {pe_file}" + code, _, _ = ShellUtils().get_stdout_stderr(cmd) + if code != 0: + logger.warning("pe_to_dot: %s -> %s failed", pe_file, dotf) + + +def collect_pe_inputs(context: core.Context) -> None: """ - save sbd configuration file + Collects PE files in the specified directory and generates DOT files if needed """ - if os.path.exists(constants.SBDCONF): - shutil.copy2(constants.SBDCONF, constants.WORKDIR) + logger.debug2(f"Looking for PE files in {context.pe_dir}") + + _list = utils.find_files_in_timespan(context, [context.pe_dir]) + pe_file_list = [f for f in _list if not f.endswith(".last")] + if pe_file_list: + pe_flist_dir = os.path.join(context.work_dir, os.path.basename(context.pe_dir)) + crmutils.mkdirp(pe_flist_dir) + + gen_dot = len(pe_file_list) <= 20 and not context.speed_up + for f in pe_file_list: + pe_file_path_in_report = os.path.join(pe_flist_dir, os.path.basename(f)) + os.symlink(f, pe_file_path_in_report) + if gen_dot: + pe_to_dot(pe_file_path_in_report) + logger.debug2(f"Found {len(pe_file_list)} PE files in {context.pe_dir}") + dump_path = f"{context.work_dir}/{os.path.basename(context.pe_dir)}" + logger.debug(f"Dump PE files into {utils.real_path(dump_path)}") + else: + logger.debug2("No PE file found for the giving time") + - if not utillib.which("sbd"): +def collect_sbd_info(context: core.Context) -> None: + """ + Collect SBD config file and information + """ + if not os.path.exists(constants.SBDCONF): + logger.debug(f"SBD config file {constants.SBDCONF} does not exist") + return + shutil.copy2(constants.SBDCONF, context.work_dir) + if not shutil.which("sbd"): return - sbd_f = os.path.join(constants.WORKDIR, constants.SBD_F) + + sbd_f = os.path.join(context.work_dir, constants.SBD_F) cmd = ". {};export SBD_DEVICE;{};{}".format(constants.SBDCONF, "sbd dump", "sbd list") with open(sbd_f, "w") as f: - _, out = ShellUtils().get_stdout(cmd) f.write("\n\n#=====[ Command ] ==========================#\n") - f.write("# %s\n"%(cmd)) - f.write(out) + f.write(f"# {cmd}\n") + f.write(utils.get_cmd_output(cmd)) + + logger.debug(f"Dump SBD config file into {utils.real_path(sbd_f)}") -def collect_sys_stats(): +def collect_sys_stats(context: core.Context) -> None: + """ + Collect system statistics + """ + cmd_list = [ + "hostname", "uptime", "ps axf", "ps auxw", "top -b -n 1", + "ip addr", "ip -s link", "ip n show", "lsscsi", "lspci", + "mount", "cat /proc/cpuinfo", "df" + ] + out_string = "" - cmd_list = ["hostname", "uptime", "ps axf", "ps auxw", "top -b -n 1", - "ip addr", "ip -s link", "ip n show", "lsscsi", "lspci", - "mount", "cat /proc/cpuinfo", "df"] for cmd in cmd_list: - out_string += "##### run \"%s\" on %s\n" % (cmd, constants.WE) - if cmd != "df": - out_string += utillib.get_command_info(cmd)[1] + '\n' - else: - out_string += utillib.get_command_info_timeout(cmd) + '\n' - - sys_stats_f = os.path.join(constants.WORKDIR, constants.SYSSTATS_F) - crmutils.str2file(out_string, sys_stats_f) - - -def collect_sys_info(): - """ - some basic system info and stats - """ - out_string = "#####Cluster info:\n" - out_string += utillib.cluster_info() - out_string += utillib.ra_build_info() - out_string += utillib.booth_info() - out_string += "\n" - out_string += "#####Cluster related packages:\n" - out_string += utillib.pkg_versions(constants.PACKAGES) - if not constants.SKIP_LVL: - out_string += utillib.verify_packages(constants.PACKAGES) - out_string += "\n" - out_string += "#####System info:\n" - out_string += "Platform: %s\n" % os.uname()[0] - out_string += "Kernel release: %s\n" % os.uname()[2] - out_string += "Architecture: %s\n" % os.uname()[-1] - if os.uname()[0] == "Linux": - out_string += "Distribution: %s\n" % utillib.get_distro_info() - - sys_info_f = os.path.join(constants.WORKDIR, constants.SYSINFO_F) - crmutils.str2file(out_string, sys_info_f) + out_string += f"##### Run \"{cmd}\" #####\n" + try: + out_string += utils.get_cmd_output(cmd, timeout=5) + "\n" + except TimeoutExpired: + logger.warning(f"Timeout while running command: {cmd}") + + _file = os.path.join(context.work_dir, constants.SYSSTATS_F) + crmutils.str2file(out_string, _file) + logger.debug(f"Dump system statistics into {utils.real_path(_file)}") + + +def collect_sys_info(context: core.Context) -> None: + """ + Collect the versions of cluster-related packages and platform information + """ + pkg_inst = utils.Package(constants.PACKAGES) + version_info = pkg_inst.version() + packages_info = "##### Installed cluster related packages #####\n" + packages_info += version_info + '\n\n' + if not context.speed_up: + packages_info += "##### Verification output of packages #####\n" + packages_info += pkg_inst.verify() + + platform, _, release, _, arch = os.uname() + sys_info = ( + f"##### System info #####\n" + f"Platform: {platform}\n" + f"Kernel release: {release}\n" + f"Architecture: {arch}\n" + ) + if platform == "Linux": + sys_info += f"Distribution: {utils.get_distro_info()}\n" + out_string = f"{sys_info}\n{packages_info}" + + _file = os.path.join(context.work_dir, constants.SYSINFO_F) + crmutils.str2file(out_string, _file) + logger.debug(f"Dump packages and platform info into {utils.real_path(_file)}") diff --git a/crmsh/report/constants.py b/crmsh/report/constants.py index 4ec5418466..8d7f987c0f 100644 --- a/crmsh/report/constants.py +++ b/crmsh/report/constants.py @@ -1,105 +1,15 @@ # Copyright (C) 2017 Xin Liang # See COPYING for license information. - -import socket -from crmsh import config - BIN_CRM = "/usr/sbin/crm" -ARGOPTS_VALUE = "f:t:l:u:X:p:L:e:E:n:MSDZVsvhdQ" -B_CONF = None -CIB_DIR = None -COMPRESS = config.report.compress -COMPRESS_DATA_FLAG = "COMPRESS HB_REPORT DATA:::" -COMPRESS_PROG = "" -COMPRESS_EXT = "" -CORES_DIRS = None -CONF = None -CRM_DAEMON_DIR = None -CTS = "" -DEST = "" -DESTDIR = "" -DO_SANITIZE = False -SANITIZE_RULE = "passw.*" -SANITIZE_RULE_DICT = dict() -SANITIZE_VALUE_CIB = [] -SANITIZE_KEY_CIB = [] -SANITIZE_VALUE_RAW = [] -EXTRA_LOGS = config.report.collect_extra_logs -FORCE_REMOVE_DEST = config.report.remove_exist_dest -FROM_TIME = "" -GET_STAMP_FUNC = None -HA_DEBUGFILE = None -HA_LOG = "" -HA_LOGFACILITY = "daemon" -HA_LOGFILE = None -HA_LOGLEVEL = "info" -HA_BIN = None -HA_VARLIB = None -LOCAL_SUDO = "" +BIN_COLLECTOR = f"{BIN_CRM} report __collector" +COMPRESS_DATA_FLAG = "COMPRESS CRM_REPORT DATA:::" LOG_PATTERNS = "CRIT: ERROR: error: warning: crit:" -NO_DESCRIPTION = 1 -NO_SSH = config.report.single_node -NODES = "" -OCF_DIR = None -PACKAGES = None -PCMK_LIB = None -PCMK_LOG = "/var/log/pacemaker/pacemaker.log /var/log/pacemaker.log" -PE_STATE_DIR = None PTEST = "crm_simulate" -SKIP_LVL = config.report.speed_up -SLAVE = 0 -SLAVEPIDS = None SSH_OPTS = "-o StrictHostKeyChecking=no -o EscapeChar=none -o ConnectTimeout=15" -SSH_PASSWORD_NODES = [] -SSH_USER = "" -SUDO = "" -THIS_IS_NODE = 0 -TMP = None -TO_TIME = 0 -TRY_SSH = "root hacluster" -# UNIQUE_MSG = "Mark:HB_REPORT:%d" % now_second -USER_CLUSTER_TYPE = "Corosync/Pacemaker" -USER_NODES = "" -WE = socket.gethostname() -WORKDIR = None - - -# Important events -# -# Patterns format: -# title extended_regexp -# NB: don't use spaces in titles or regular expressions! -EVENT_PATTERNS = """ -membership crmd.*(NEW|LOST)|pcmk.*(lost|memb|LOST|MEMB): -quorum crmd.*Updating.quorum.status|crmd.*quorum.(lost|ac?quir) -pause Process.pause.detected -resources lrmd.*(start|stop) -stonith crmd.*Exec|stonith-ng.*log_oper.*reboot|stonithd.*(requests|(Succeeded|Failed).to.STONITH|result=) -start_stop Configuration.validated..Starting.heartbeat|Corosync.Cluster.Engine|Executive.Service.RELEASE|Requesting.shutdown|Shutdown.complete -""" +CHECK_LOG_LINES = 10 +STAMP_TYPE = "" -PACKAGES = """pacemaker libpacemaker3 pacemaker-cli pacemaker-remote -pacemaker-pygui pacemaker-pymgmt pymgmt-client -openais libopenais2 libopenais3 corosync libcorosync4 -libcfg6 libcmap4 libcorosync_common4 libcpg4 libquorum5 -libsam4 libtotem_pg5 libvotequorum8 -corosync-qdevice corosync-qnetd -resource-agents cluster-glue libglue2 ldirectord libqb0 -heartbeat heartbeat-common heartbeat-resources libheartbeat2 -booth -sbd -ocfs2-tools ocfs2-tools-o2cb ocfs2console -ocfs2-kmp-default ocfs2-kmp-pae ocfs2-kmp-xen ocfs2-kmp-debug ocfs2-kmp-trace -drbd drbd-kmp-xen drbd-kmp-pae drbd-kmp-default drbd-kmp-debug drbd-kmp-trace -drbd-heartbeat drbd-pacemaker drbd-utils drbd-bash-completion drbd-xen -lvm2 lvm2-clvm cmirrord -libdlm libdlm2 libdlm3 -hawk ruby lighttpd -kernel-default kernel-pae kernel-xen -glibc -""" - -EMAIL_TMPLATE = """ +DECRIPTION_TMPLATE = """ Please edit this template and describe the issue/problem you encountered. Then, post to http://clusterlabs.org/mailman/listinfo/users @@ -109,40 +19,78 @@ Thank you. Date: {0} -By: report {1} +By: crm report {1} Subject: [short problem description] Severity: [choose one] enhancement minor normal major critical blocking -Component: [choose one] CRM LRM CCM RA fencing openais comm GUI tools other -------------------------------------------------------- Detailed description: """ +PACKAGES = "booth cluster-glue cluster-glue-libs corosync corosync-qdevice corosync-qnetd corosync-testagents crmsh crmsh-scripts csync2 doxygen2man drbd-utils gfs2-kmp-default gfs2-utils hawk-apiserver ldirectord libcfg6 libcmap4 libcorosync_common4 libcpg4 libdlm libdlm3 libqb-tools libqb100 libquorum5 libsam4 libtotem_pg5 libvotequorum8 linstor linstor-common linstor-controller linstor-satellite monitoring-plugins-metadata o2locktop ocfs2-tools ocfs2-tools-o2cb omping pacemaker pacemaker-cli pacemaker-cts pacemaker-libs pacemaker-remote pacemaker-schemas patterns-ha pssh python-pssh python3-linstor python3-linstor-client python3-pacemaker python3-parallax resource-agents resource-agents-zfs ruby2.5-rubygem-sass-listen ruby2.5-rubygem-sass-listen-doc sbd" ANALYSIS_F = "analysis.txt" -BT_F = "backtraces.txt" +COREDUMP_F = "coredump_info.txt" CIB_F = "cib.xml" -CIB_TXT_F = "cib.txt" -CONFIGURATIONS = ["/etc/drbd.conf", - "/etc/drbd.d", - "/etc/booth/booth.conf"] +CONFIGURE_SHOW_F = "configure_show.txt" +CONFIGURATIONS = [ + "/etc/drbd.conf", + "/etc/drbd.d", + "/etc/booth/booth.conf" +] COROSYNC_RECORDER_F = "fdata.txt" +COROSYNC_F = "corosync.conf" CRM_MON_F = "crm_mon.txt" CRM_VERIFY_F = "crm_verify.txt" DESCRIPTION_F = "description.txt" DLM_DUMP_F = "dlm_dump.txt" -HALOG_F = "ha-log.txt" -HB_UUID_F = "hb_uuid.txt" -HOSTCACHE = "hostcache" JOURNAL_F = "journal.log" +JOURNAL_PCMK_F = "journal_pacemaker.log" +JOURNAL_COROSYNC_F = "journal_corosync.log" +JOURNAL_SBD_F = "journal_sbd.log" MEMBERSHIP_F = "members.txt" PERMISSIONS_F = "permissions.txt" SBDCONF = "/etc/sysconfig/sbd" +PCMKCONF = "/etc/sysconfig/pacemaker" SYSINFO_F = "sysinfo.txt" SYSSTATS_F = "sysstats.txt" TIME_F = "time.txt" OCFS2_F = "ocfs2.txt" SBD_F = "sbd.txt" OSRELEASE = "/etc/os-release" +TIME_FORMAT = "%Y-%m-%d %H:%M:%S" +RESULT_TIME_SUFFIX = "%a-%d-%b-%Y" +NAME = "crm report" +COROSYNC_LIB = "/var/lib/corosync" + +EXTRA_HELP = ''' +Examples + # collect from 2pm, today + {name} -f 2pm report_1 + + # collect from "2007/9/5 12:30" to "2007/9/5 14:00" + {name} -f "2007/9/5 12:30" -t "2007/9/5 14:00" report_2 + + # collect from 1:00 to 3:00, today; include /var/log/cluster/ha-debug as extra log + {name} -f 1:00 -t 3:00 -E /var/log/cluster/ha-debug report_3 + + # collect from "09sep07 2:00" and use 'hacluster' as ssh user + {name} -f "09sep07 2:00" -u hacluster report_4 + + # collect from 18:00, today; replace sensitive message like "usern.*" or "admin.*" + {name} -f 18:00 -s -p "usern.*" -p "admin.*" report_5 + + # collect from 1 mounth ago + {name} -f 1m + + # collect from 75 hours ago + {name} -f 75H + + # collect from 10 minutes ago + {name} -f 10M + + # collect from 2 days ago to 1 day ago + {name} -f 2d -t 1d +'''.format(name=NAME) # vim:ts=4:sw=4:et: diff --git a/crmsh/report/core.py b/crmsh/report/core.py index 9072a67f33..f2ef23cc4b 100644 --- a/crmsh/report/core.py +++ b/crmsh/report/core.py @@ -2,427 +2,489 @@ # Copyright (C) 2017 Xin Liang # See COPYING for license information. -import getopt +import argparse import multiprocessing import os import re import sys -import datetime import shutil +import json +from inspect import getmembers, isfunction +from typing import List from crmsh import utils as crmutils -from crmsh import config, log, userdir -from crmsh.report import constants, utillib +from crmsh import config, log, userdir, corosync, tmpfiles, ui_cluster, sh from crmsh.sh import ShellUtils logger = log.setup_report_logger(__name__) -def collect_for_nodes(nodes, arg_str): +class Context: """ - Start slave collectors + Class to set/get essential attributes during the whole crm report process + """ + def load(self) -> None: + """ + Load default values + """ + self.name = "crm_report" + self.from_time: float = config.report.from_time + self.to_time: float = utils.now() + self.no_compress: bool = not config.report.compress + self.speed_up: bool = config.report.speed_up + self.extra_log_list: List[str] = config.report.collect_extra_logs.split() + self.rm_exist_dest: bool = config.report.remove_exist_dest + self.single: bool= config.report.single_node + self.sensitive_regex_list: List[str] = [] + self.regex_list: List[str] = "CRIT: ERROR: error: warning: crit:".split() + self.ssh_askpw_node_list: List[str] = [] + self.me = crmutils.this_node() + self.pe_dir: str + self.cib_dir: str + self.pcmk_lib_dir: str + self.pcmk_exec_dir: str + self.cores_dir_list: List[str] + self.dest: str + self.dest_dir: str + self.work_dir: str + self.node_list: List[str] + self.ssh_user: str + self.ssh_option_list: List[str] + self.no_log_list: List[str] + self.sanitize: bool + self.debug: int + self.compress_prog: str + self.compress_suffix: str + self.main_node = self.me + + def __str__(self) ->str: + return json.dumps(self.__dict__) + + def __setattr__(self, name: str, value) -> None: + """ + Set the attribute value and perform validations + """ + if name in ["from_time", "to_time"] and value: + value = utils.parse_to_timestamp(value) + if name == "extra_log_list" and value and hasattr(self, "extra_log_list"): + value = list(set(self.extra_log_list) | set(value)) + super().__setattr__(name, value) + + def __setitem__(self, key: str, value) -> None: + self.__dict__[key] = value + + +from crmsh.report import constants, utils, collect + + +def add_arguments() -> argparse.Namespace: + parser = argparse.ArgumentParser( + usage=f"{constants.NAME} [options] [dest]", + add_help=False, + formatter_class=lambda prog: argparse.HelpFormatter(prog, width=80) + ) + parser.add_argument("-h", "--help", action="store_true", dest="help", + help="Show this help message and exit") + parser.add_argument('-f', dest='from_time', metavar='FROM_TIME', + help='Time to start from (default: 12 hours before)') + parser.add_argument('-t', dest='to_time', metavar='TO_TIME', + help='Time to finish at (default: now)') + parser.add_argument('-d', dest='no_compress', action='store_true', + help="Don't compress, but leave result in a directory") + parser.add_argument('-n', dest='node_list', metavar='NODE', action=ui_cluster.CustomAppendAction, default=[], + help='Node names for this cluster; this option is additive (use -n a -n b or -n "a b")') + parser.add_argument('-u', dest='ssh_user', metavar='SSH_USER', + help='SSH user to access other nodes') + parser.add_argument('-X', dest='ssh_option_list', metavar='SSH_OPTION', action=ui_cluster.CustomAppendAction, default=[], + help='Extra ssh(1) options; this option is additive') + parser.add_argument('-E', dest='extra_log_list', metavar='FILE', action=ui_cluster.CustomAppendAction, default=[], + help='Extra logs to collect; this option is additive') + parser.add_argument('-e', dest='no_log_list', metavar='FILE', action=ui_cluster.CustomAppendAction, default=[], + help='Don\'t collect these files; this option is additive') + parser.add_argument('-s', dest='sanitize', action='store_true', + help='Replace sensitive info in PE or CIB or pacemaker log files') + parser.add_argument('-p', dest='sensitive_regex_list', metavar='PATT', action=ui_cluster.CustomAppendAction, default=[], + help='Regular expression to match variables containing sensitive data (default: passw.*); this option is additive') + parser.add_argument('-Q', dest='speed_up', action='store_true', + help="The quick mode, which skips producing dot files from PE inputs, verifying installed cluster stack rpms and sanitizing files for sensitive information") + parser.add_argument('-Z', dest='rm_exist_dest', action='store_true', + help='If destination directories exist, remove them instead of exiting') + parser.add_argument('-S', dest='single', action='store_true', + help="Single node operation; don't try to start report collectors on other nodes") + parser.add_argument('-v', dest='debug', action='count', default=0, + help='Increase verbosity') + parser.add_argument('dest', nargs='?', + help='Report name (may include path where to store the report)') + + args = parser.parse_args() + if args.help: + parser.print_help() + print(constants.EXTRA_HELP) + sys.exit(0) + + return args + + +def push_data(context: Context) -> None: + """ + Push data from this node + """ + logger.debug2(f"Pushing data from {context.me}:{context.work_dir} to {context.main_node}") + cmd = f'cd {context.work_dir}/.. && tar -h -c {context.me}' + _, out, err = ShellUtils().get_stdout_stderr(cmd, raw=True) + if out: + print(f"{constants.COMPRESS_DATA_FLAG}{out}") + if err: + raise utils.ReportGenericError(crmutils.to_ascii(err)) + + +def pick_compress_prog(context: Context) -> None: + """ + Pick the appropriate compression program and its file suffix + """ + context.compress_prog, context.compress_suffix = pick_first_compress() + if not context.compress_prog: + context.compress_prog, context.compress_suffix = "cat", "" + + +def pick_first_compress(): + compress_prog_suffix_dict = { + "gzip": ".gz", + "bzip2": ".bz2", + "xz": ".xz" + } + for cmd, suffix in compress_prog_suffix_dict.items(): + if shutil.which(cmd): + return cmd, suffix + logger.warning("Could not find a compression program") + return None, None + + +def finalword(context: Context) -> None: + logger.info(f"The report is saved in {context.dest_path}") + timespan_str = utils.get_timespan_str(context) + logger.info(f"Report timespan: {timespan_str}") + nodes_str = ' '.join(context.node_list) + logger.info(f"Including nodes: {nodes_str}") + logger.info("Thank you for taking time to create this report") + + +def process_results(context: Context) -> None: + """ + Process report results + """ + if not context.speed_up: + utils.do_sanitize(context) + utils.analyze(context) + utils.create_description_template(context) + + if context.no_compress: + shutil.move(context.work_dir, context.dest_dir) + else: + cmd_cd_tar = f"(cd {context.work_dir}/.. && tar cf - {context.dest})" + cmd_compress = f"{context.compress_prog} > {context.dest_dir}/{context.dest}.tar{context.compress_suffix}" + cmd = f"{cmd_cd_tar}|{cmd_compress}" + logger.debug2(f"Running: {cmd}") + sh.cluster_shell().get_stdout_or_raise_error(cmd) + + finalword(context) + + +def collect_logs_and_info(context: Context) -> None: + """ + Collect logs and information using multiprocessing + """ + # Make sure not to occupy all CPUs + pool = multiprocessing.Pool(round(0.8 * multiprocessing.cpu_count())) + # result here to store AsyncResult object returned from apply_async + # Then calling get() method will catch exceptions like NameError, AttributeError, etc. + # Otherwise parent process will not know these exceptions raised + # Calling get() right after apply_async will be blocked until child process finished, so + # need to append to a list firstly + result_list = [] + # Generate function list from collect.py + for cf in [f for f, _ in getmembers(collect, isfunction) if f.startswith("collect_")]: + result = pool.apply_async(getattr(collect, cf), (context,)) + result_list.append(result) + pool.close() + pool.join() + + for result in result_list: + try: + result.get() + except: + utils.print_traceback() + + +def collect_for_nodes(context: Context) -> None: + """ + Start collectors on each node """ process_list = [] - for node in nodes.split(): - if node in constants.SSH_PASSWORD_NODES: - logger.info("Please provide password for %s at %s", utillib.say_ssh_user(), node) - logger.info("Note that collecting data will take a while.") - utillib.start_slave_collector(node, arg_str) + for node in context.node_list: + if node in context.ssh_askpw_node_list: + node_str = f"{context.ssh_user}@{node}" if context.ssh_user else node + logger.info(f"Please provide password for {node_str}") + start_collector(node, context) else: - p = multiprocessing.Process(target=utillib.start_slave_collector, args=(node, arg_str)) + p = multiprocessing.Process(target=start_collector, args=(node, context)) p.start() process_list.append(p) for p in process_list: - p.join() - -def dump_env(): - """ - this is how we pass environment to other hosts - """ - env_dict = {} - env_dict["DEST"] = constants.DEST - env_dict["FROM_TIME"] = constants.FROM_TIME - env_dict["TO_TIME"] = constants.TO_TIME - env_dict["USER_NODES"] = constants.USER_NODES - env_dict["NODES"] = constants.NODES - env_dict["HA_LOG"] = constants.HA_LOG - # env_dict["UNIQUE_MSG"] = constants.UNIQUE_MSG - env_dict["SANITIZE_RULE_DICT"] = constants.SANITIZE_RULE_DICT - env_dict["DO_SANITIZE"] = constants.DO_SANITIZE - env_dict["SKIP_LVL"] = constants.SKIP_LVL - env_dict["EXTRA_LOGS"] = constants.EXTRA_LOGS - env_dict["PCMK_LOG"] = constants.PCMK_LOG - env_dict["VERBOSITY"] = int(config.report.verbosity) or (1 if config.core.debug else 0) - - res_str = "" - for k, v in env_dict.items(): - res_str += " {}={}".format(k, v) - return res_str - -def get_log(): - """ - get the right part of the log - """ - outf = os.path.join(constants.WORKDIR, constants.HALOG_F) - - # collect journal from systemd unless -M was passed - if constants.EXTRA_LOGS: - utillib.collect_journal(constants.FROM_TIME, - constants.TO_TIME, - os.path.join(constants.WORKDIR, constants.JOURNAL_F)) - - if constants.HA_LOG and not os.path.isfile(constants.HA_LOG): - if not is_collector(): # warning if not on slave - logger.warning("%s not found; we will try to find log ourselves", constants.HA_LOG) - constants.HA_LOG = "" - if not constants.HA_LOG: - constants.HA_LOG = utillib.find_log() - if (not constants.HA_LOG) or (not os.path.isfile(constants.HA_LOG)): - if constants.CTS: - pass # TODO - else: - logger.warning("not log at %s", constants.WE) - return + p.join() + + +def start_collector(node: str, context: Context) -> None: + """ + Start collector at specific node + """ + cmd = f"{constants.BIN_COLLECTOR} '{context}'" + err = "" - if constants.CTS: - pass # TODO + if node == context.me: + code, out, err = ShellUtils().get_stdout_stderr(cmd) else: - try: - getstampproc = utillib.find_getstampproc(constants.HA_LOG) - except PermissionError: - return - if getstampproc: - constants.GET_STAMP_FUNC = getstampproc - if utillib.dump_logset(constants.HA_LOG, constants.FROM_TIME, constants.TO_TIME, outf): - utillib.log_size(constants.HA_LOG, outf+'.info') + node = f"{context.ssh_user}@{node}" if context.ssh_user else node + cmd = cmd.replace('"', '\\"') + cmd = f'ssh {constants.SSH_OPTS} {node} "{context.sudo} {cmd}"' + code, out, err = sh.LocalShell().get_rc_stdout_stderr(context.ssh_user, cmd) + + if code != 0: + logger.warning(err) + # ERROR/WARNING/DEBUG messages + if err: + print(err, file=sys.stderr) + if out == '': + return + + compress_data = "" + for data in out.split("\n"): + if data.startswith(constants.COMPRESS_DATA_FLAG): + # crm report data from collector + compress_data = data.lstrip(constants.COMPRESS_DATA_FLAG) else: - logger.warning("could not figure out the log format of %s", constants.HA_LOG) + # log data from collector + print(data) + cmd = f"cd {context.work_dir} && tar x" + ShellUtils().get_stdout(cmd, input_s=eval(compress_data)) -def is_collector(): + +def process_dest(context: Context) -> None: """ - the instance where user runs crm report is the master - the others are slaves + Process destination path and file """ - if len(sys.argv) > 1 and sys.argv[1] == "__slave": - return True - return False - - -def load_env(env_str): - list_ = [] - for tmp in env_str.split(): - if re.search('=', tmp): - item = tmp + if not context.dest: + suffix = utils.now(constants.RESULT_TIME_SUFFIX) + context.dest = f"{context.name}-{suffix}" + + dest_dir = os.path.dirname(context.dest) or "." + if not os.path.isdir(dest_dir): + raise utils.ReportGenericError(f"Directory {dest_dir} does not exist") + context.dest_dir = dest_dir + + dest_file = os.path.basename(context.dest) + if not crmutils.is_filename_sane(dest_file): + raise utils.ReportGenericError(f"{dest_file} is invalid file name") + + if context.no_compress and os.path.isdir(context.dest): + if context.rm_exist_dest: + shutil.rmtree(context.dest) else: - list_.remove(item) - item += " %s" % tmp - list_.append(item) - - env_dict = {} - env_dict = crmutils.nvpairs2dict(list_) - constants.DEST = env_dict["DEST"] - constants.FROM_TIME = float(env_dict["FROM_TIME"]) - constants.TO_TIME = float(env_dict["TO_TIME"]) - constants.USER_NODES = env_dict["USER_NODES"] - constants.NODES = env_dict["NODES"] - constants.HA_LOG = env_dict["HA_LOG"] - # constants.UNIQUE_MSG = env_dict["UNIQUE_MSG"] - constants.SANITIZE_RULE_DICT = env_dict["SANITIZE_RULE_DICT"] - constants.DO_SANITIZE = env_dict["DO_SANITIZE"] - constants.SKIP_LVL = utillib.str_to_bool(env_dict["SKIP_LVL"]) - constants.EXTRA_LOGS = env_dict["EXTRA_LOGS"] - constants.PCMK_LOG = env_dict["PCMK_LOG"] - config.report.verbosity = env_dict["VERBOSITY"] - - -def parse_argument(argv): - try: - opt, arg = getopt.getopt(argv[1:], constants.ARGOPTS_VALUE) - except getopt.GetoptError: - usage("short") - - if len(arg) == 0: - constants.DESTDIR = "." - constants.DEST = "crm_report-%s" % datetime.datetime.now().strftime('%a-%d-%b-%Y') - elif len(arg) == 1: - constants.TMP = arg[0] + raise utils.ReportGenericError(f"Destination directory {context.dest} exists, please cleanup or use -Z option") + + context.dest = dest_file + pick_compress_prog(context) + if context.no_compress: + context.dest_path = f"{context.dest_dir}/{context.dest}" else: - usage("short") - - verbosity = 0 - for args, option in opt: - if args == '-h': - usage() - if args == "-V": - version() - if args == '-f': - constants.FROM_TIME = crmutils.parse_to_timestamp(option) - utillib.check_time(constants.FROM_TIME, option) - if args == '-t': - constants.TO_TIME = crmutils.parse_to_timestamp(option) - utillib.check_time(constants.TO_TIME, option) - if args == "-n": - constants.USER_NODES += " %s" % option - if args == "-u": - constants.SSH_USER = option - if args == "-X": - constants.SSH_OPTS += " %s" % option - if args == "-l": - constants.HA_LOG = option - if args == "-e": - constants.EDITOR = option - if args == "-p": - constants.SANITIZE_RULE += " %s" % option - if args == "-s": - constants.DO_SANITIZE = True - if args == "-Q": - constants.SKIP_LVL = True - if args == "-L": - constants.LOG_PATTERNS += " %s" % option - if args == "-S": - constants.NO_SSH = True - if args == "-D": - constants.NO_DESCRIPTION = 1 - if args == "-Z": - constants.FORCE_REMOVE_DEST = True - if args == "-M": - constants.EXTRA_LOGS = "" - if args == "-E": - constants.EXTRA_LOGS += " %s" % option - if args == "-v": - verbosity += 1 - if args == '-d': - constants.COMPRESS = False - - config.report.verbosity = verbosity - - if config.report.sanitize_rule: - constants.DO_SANITIZE = True - temp_pattern_set = set() - temp_pattern_set |= set(re.split('\s*\|\s*|\s+', config.report.sanitize_rule.strip('|'))) - constants.SANITIZE_RULE += " {}".format(' '.join(temp_pattern_set)) - utillib.parse_sanitize_rule(constants.SANITIZE_RULE) - - if not constants.FROM_TIME: - from_time = config.report.from_time - if re.search("^-[1-9][0-9]*[YmdHM]$", from_time): - number = int(re.findall("[1-9][0-9]*", from_time)[0]) - if re.search("^-[1-9][0-9]*Y$", from_time): - timedelta = datetime.timedelta(days = number * 365) - if re.search("^-[1-9][0-9]*m$", from_time): - timedelta = datetime.timedelta(days = number * 30) - if re.search("^-[1-9][0-9]*d$", from_time): - timedelta = datetime.timedelta(days = number) - if re.search("^-[1-9][0-9]*H$", from_time): - timedelta = datetime.timedelta(hours = number) - if re.search("^-[1-9][0-9]*M$", from_time): - timedelta = datetime.timedelta(minutes = number) - from_time = (datetime.datetime.now() - timedelta).strftime("%Y-%m-%d %H:%M") - constants.FROM_TIME = crmutils.parse_to_timestamp(from_time) - utillib.check_time(constants.FROM_TIME, from_time) - else: - utillib.log_fatal("Wrong format for from_time in /etc/crm/crm.conf; (-[1-9][0-9]*[YmdHM])") + context.dest_path = f"{context.dest_dir}/{context.dest}.tar{context.compress_suffix}" + + +def process_node_list(context: Context) -> None: + if not context.node_list: + context.node_list = crmutils.list_cluster_nodes() + if not context.node_list: + raise utils.ReportGenericError("Could not figure out a list of nodes; is this a cluster node?") + if context.single: + context.node_list = [context.me] + + for node in context.node_list[:]: + if node == context.me: + continue + try: + crmutils.ping_node(node) + except Exception as err: + logger.error(str(err)) + context.node_list.remove(node) -def run(): +def process_arguments(context: Context) -> None: + if context.to_time <= context.from_time: + raise ValueError("The start time must be before the finish time") + process_node_list(context) + process_dest(context) - utillib.check_env() - tmpdir = utillib.make_temp_dir() - utillib.add_tempfiles(tmpdir) - # - # get and check options; and the destination - # +def setup_workdir(context: Context) -> None: + """ + Setup working directory where crm report can put all logs into it + """ + tmpdir = tmpfiles.create_dir() if not is_collector(): - parse_argument(sys.argv) - set_dest(constants.TMP) - constants.WORKDIR = os.path.join(tmpdir, constants.DEST) + context.work_dir = os.path.join(tmpdir, os.path.basename(context.dest)) else: - constants.WORKDIR = os.path.join(tmpdir, constants.DEST, constants.WE) - utillib._mkdir(constants.WORKDIR) + context.work_dir = os.path.join(tmpdir, + os.path.basename(context.dest), + context.me) + crmutils.mkdirp(context.work_dir) + logger.debug2(f"Setup work directory in {context.work_dir}") - if is_collector(): - load_env(' '.join(sys.argv[2:])) - utillib.compatibility_pcmk() - if constants.CTS == "" or is_collector(): - utillib.get_log_vars() +def load_context(context: Context) -> None: + """ + Load context attributes from master process + """ + for key, value in json.loads(sys.argv[2]).items(): + context[key] = value + context.me = crmutils.this_node() + adjust_verbosity(context) + logger.debug2(f"Loading context from collector: {context}") - if not is_collector(): - constants.NODES = ' '.join(utillib.get_nodes()) - logger.debug("nodes: %s", constants.NODES) - if constants.NODES == "": - utillib.log_fatal("could not figure out a list of nodes; is this a cluster node?") - if constants.WE in constants.NODES.split(): - constants.THIS_IS_NODE = 1 - if not is_collector(): - if constants.THIS_IS_NODE != 1: - logger.warning("this is not a node and you didn't specify a list of nodes using -n") - # - # ssh business - # - if not constants.NO_SSH: - # if the ssh user was supplied, consider that it - # works; helps reduce the number of ssh invocations - utillib.find_ssh_user() - - # - # find the logs and cut out the segment for the period - # - if constants.THIS_IS_NODE == 1: - get_log() +def find_ssh_user(context: Context) -> None: + """ + Finds the SSH user for passwordless SSH access to nodes in the context's node_list + """ + ssh_user = "" + user_try_list = [ + context.ssh_user, + userdir.get_sudoer(), + userdir.getuser() + ] + + for n in context.node_list: + if n == context.me: + continue + rc = False + for u in user_try_list: + if not u: + continue + ssh_str = f"{u}@{n}" + if not crmutils.check_ssh_passwd_need(u, u, n): + logger.debug(f"ssh {ssh_str} OK") + ssh_user = u + rc = True + break + else: + logger.debug(f"ssh {ssh_str} failed") + if not rc: + context.ssh_askpw_node_list.append(n) + if context.ssh_askpw_node_list: + logger.warning(f"passwordless ssh to node(s) {context.ssh_askpw_node_list} does not work") + + context.sudo = "" if ssh_user in ("root", "hacluster") else "sudo" + context.ssh_user = ssh_user or "" + logger.debug2(f"context.ssh_user is {context.ssh_user}") + + +def load_from_crmsh_config(context: Context) -> None: + """ + load context attributes from crmsh.config + """ + config_context_map = { + "crm_config": "cib_dir", + "crm_daemon_dir": "pcmk_exec_dir", + "pe_state_dir": "pe_dir" + } + context_str_map = { + "cib_dir": "CIB", + "pcmk_exec_dir": "Pacemaker exec", + "pe_dir": "PE" + } + for config_item, context_attr in config_context_map.items(): + value = getattr(config.path, config_item, None) + if not value or not os.path.isdir(value): + raise utils.ReportGenericError(f"Cannot find {context_str_map[context_attr]} directory") + setattr(context, context_attr, value) + + +def load_context_attributes(context: Context) -> None: + """ + load context attributes from crmsh.config and corosync.conf + """ + load_from_crmsh_config(context) - if not is_collector(): - arg_str = dump_env() - if not constants.NO_SSH: - collect_for_nodes(constants.NODES, arg_str) - elif constants.THIS_IS_NODE == 1: - collect_for_nodes(constants.WE, arg_str) - - # - # endgame: - # slaves tar their results to stdout, the master waits - # for them, analyses results, asks the user to edit the - # problem description template, and prints final notes - # - if is_collector(): - utillib.collect_info() - cmd = r"cd %s/.. && tar -h -cf - %s" % (constants.WORKDIR, constants.WE) - code, out, err = ShellUtils().get_stdout_stderr(cmd, raw=True) - print("{}{}".format(constants.COMPRESS_DATA_FLAG, out)) - else: - p_list = [] - p_list.append(multiprocessing.Process(target=utillib.analyze)) - p_list.append(multiprocessing.Process(target=utillib.events, args=(constants.WORKDIR,))) - for p in p_list: - p.start() + context.pcmk_lib_dir = os.path.dirname(context.cib_dir) + context.cores_dir_list = [os.path.join(context.pcmk_lib_dir, "cores")] + context.cores_dir_list.extend([constants.COROSYNC_LIB] if os.path.isdir(constants.COROSYNC_LIB) else []) - utillib.check_if_log_is_empty() - utillib.mktemplate(sys.argv) - for p in p_list: - p.join() +def adjust_verbosity(context: Context) -> None: + if context.debug > 0: + config.report.verbosity = context.debug + elif config.core.debug: + config.report.verbosity = 1 + context.debug = 1 - if not constants.SKIP_LVL: - utillib.sanitize() - if constants.COMPRESS: - utillib.pick_compress() - cmd = r"(cd %s/.. && tar cf - %s)|%s > %s/%s.tar%s" % ( - constants.WORKDIR, constants.DEST, constants.COMPRESS_PROG, - constants.DESTDIR, constants.DEST, constants.COMPRESS_EXT) - crmutils.ext_cmd(cmd) - else: - shutil.move(constants.WORKDIR, constants.DESTDIR) - utillib.finalword() +def parse_arguments(context: Context) -> None: + """ + Add, parse and process arguments + """ + args = add_arguments() + crmutils.check_space_option_value(args) + for arg in vars(args): + value = getattr(args, arg) + if value or not hasattr(context, arg): + setattr(context, arg, value) + adjust_verbosity(context) + process_arguments(context) -def set_dest(dest): +def is_collector() -> bool: """ - default DEST has already been set earlier (if the - argument is missing) + collector is for collecting logs and data + """ + return len(sys.argv) > 1 and sys.argv[1] == "__collector" + + +def run_impl() -> None: + """ + Major work flow + """ + ctx = Context() + + if is_collector(): + load_context(ctx) + else: + ctx.load() + parse_arguments(ctx) + load_context_attributes(ctx) + + setup_workdir(ctx) + + if is_collector(): + collect_logs_and_info(ctx) + push_data(ctx) + else: + find_ssh_user(ctx) + collect_for_nodes(ctx) + process_results(ctx) + + +def run() -> None: + """ + crm report entry """ - if dest: - constants.DESTDIR = utillib.get_dirname(dest) - constants.DEST = os.path.basename(dest) - if not os.path.isdir(constants.DESTDIR): - utillib.log_fatal("%s is illegal directory name" % constants.DESTDIR) - if not crmutils.is_filename_sane(constants.DEST): - utillib.log_fatal("%s contains illegal characters" % constants.DEST) - if not constants.COMPRESS and os.path.isdir(os.path.join(constants.DESTDIR, constants.DEST)): - if constants.FORCE_REMOVE_DEST: - shutil.rmtree(os.path.join(constants.DESTDIR, constants.DEST)) - else: - utillib.log_fatal("destination directory DESTDIR/DEST exists, please cleanup or use -Z") - - -def usage(short_msg=''): - print(""" -usage: report -f {time} [-t time] - [-u user] [-X ssh-options] [-l file] [-n nodes] [-E files] - [-p patt] [-L patt] [-e prog] [-MSDZQVsvhd] [dest] - - -f time: time to start from - -t time: time to finish at (dflt: now) - -d : don't compress, but leave result in a directory - -n nodes: node names for this cluster; this option is additive - (use either -n "a b" or -n a -n b) - if you run report on the loghost or use autojoin, - it is highly recommended to set this option - -u user: ssh user to access other nodes (dflt: empty, root, hacluster) - -X ssh-options: extra ssh(1) options - -l file: log file - -E file: extra logs to collect; this option is additive - (dflt: /var/log/messages) - -s : sanitize the PE and CIB files - -p patt: regular expression to match variables containing sensitive data; - this option is additive (dflt: "passw.*") - -L patt: regular expression to match in log files for analysis; - this option is additive (dflt: CRIT: ERROR:) - -e prog: your favourite editor - -Q : don't run resource intensive operations (speed up) - -M : don't collect extra logs (/var/log/messages) - -D : don't invoke editor to write description - -Z : if destination directories exist, remove them instead of exiting - (this is default for CTS) - -S : single node operation; don't try to start report - collectors on other nodes - -v : increase verbosity - -V : print version - dest : report name (may include path where to store the report) - """) - if short_msg != "short": - print(""" - . the multifile output is stored in a tarball {dest}.tar.bz2 - . the time specification is as in either Date::Parse or - Date::Manip, whatever you have installed; Date::Parse is - preferred - . we try to figure where is the logfile; if we can't, please - clue us in ('-l') - . we collect only one logfile and /var/log/messages; if you - have more than one logfile, then use '-E' option to supply - as many as you want ('-M' empties the list) - - Examples - - report -f 2pm report_1 - report -f "2007/9/5 12:30" -t "2007/9/5 14:00" report_2 - report -f 1:00 -t 3:00 -l /var/log/cluster/ha-debug report_3 - report -f "09-sep-07 2:00" -u hbadmin report_4 - report -f 18:00 -p "usern.*" -p "admin.*" report_5 - - . WARNING . WARNING . WARNING . WARNING . WARNING . WARNING . - - We won't sanitize the CIB and the peinputs files, because - that would make them useless when trying to reproduce the - PE behaviour. You may still choose to obliterate sensitive - information if you use the -s and -p options, but in that - case the support may be lacking as well. The logs and the - crm_mon, ccm_tool, and crm_verify output are *not* sanitized. - - Additional system logs (/var/log/messages) are collected in - order to have a more complete report. If you don't want that - specify -M. - - IT IS YOUR RESPONSIBILITY TO PROTECT THE DATA FROM EXPOSURE! - """) - sys.exit(1) - - -def version(): - print(utillib.crmsh_info().strip('\n')) - sys.exit(0) - - -if __name__ == "__main__": try: - run() + run_impl() except UnicodeDecodeError: - import traceback - traceback.print_exc() - sys.stdout.flush() - + utils.print_traceback() + sys.exit(1) + except utils.ReportGenericError as err: + if str(err): + logger.error(str(err)) + sys.exit(1) # vim:ts=4:sw=4:et: diff --git a/crmsh/report/utillib.py b/crmsh/report/utillib.py deleted file mode 100644 index 53ac7e63b3..0000000000 --- a/crmsh/report/utillib.py +++ /dev/null @@ -1,1602 +0,0 @@ -# Copyright (C) 2017 Xin Liang -# See COPYING for license information. - -import bz2 -import lzma -import datetime -import glob -import gzip -import multiprocessing -import os -import random -import re -import shutil -import string -import subprocess -import sys -import atexit -import tempfile -import contextlib -from dateutil import tz -from threading import Timer -from inspect import getmembers, isfunction - -import crmsh.config -from crmsh import utils as crmutils, sh -from crmsh import corosync, log, userdir -from crmsh.report import constants, collect -from crmsh.sh import ShellUtils - - -logger = log.setup_report_logger(__name__) - - -class Tempfile(object): - - def __init__(self): - self.file = create_tempfile() - logger.debug("create tempfile \"%s\"", self.file) - - def add(self, filename): - with open(self.file, 'a') as f: - f.write(filename + '\n') - logger.debug("add tempfile \"%s\" to \"%s\"", filename, self.file) - - def drop(self): - with open(self.file, 'r') as f: - for line in f.read().split('\n'): - if os.path.isdir(line): - shutil.rmtree(line) - if os.path.isfile(line): - os.remove(line) - os.remove(self.file) - logger.debug("remove tempfile \"%s\"", self.file) - - -def add_tempfiles(filename): - t = Tempfile() - t.add(filename) - atexit.register(t.drop) - - -def _mkdir(directory): - """ - from crmsh/tmpfiles.py - """ - if not os.path.isdir(directory): - try: - os.makedirs(directory) - except OSError as err: - log_fatal("Failed to create directory: %s" % (err)) - - -def arch_logs(logf, from_time, to_time): - """ - go through archived logs (timewise backwards) and see if there - are lines belonging to us - (we rely on untouched log files, i.e. that modify time - hasn't been changed) - """ - ret = [] - files = [logf] - files += glob.glob(logf+"*[0-9z]") - # like ls -t, newest first - for f in sorted(files, key=os.path.getmtime, reverse=True): - # reset this var to check every file's format - constants.GET_STAMP_FUNC = None - res = is_our_log(f, from_time, to_time) - if res == 0: # noop, continue - continue - elif res == 1: # include log and continue - ret.append(f) - logger.debug("found log %s", f) - elif res == 2: # don't go through older logs! - break - elif res == 3: # include log and continue - ret.append(f) - logger.debug("found log %s", f) - break - return ret - - -def analyze(): - workdir = constants.WORKDIR - out_string = "" - tmp_string = "" - flist = [constants.MEMBERSHIP_F, constants.CRM_MON_F, - constants.B_CONF, constants.SYSINFO_F, constants.CIB_F] - for f in flist: - out_string += "Diff %s... " % f - if not glob.glob("%s/*/%s" % (workdir, f)): - out_string += "no %s/*/%s :/\n" % (workdir, f) - continue - code, tmp_string = analyze_one(workdir, f) - if tmp_string: - out_string += "\n" + tmp_string + "\n\n" - else: - out_string += "OK\n" - if code == 0: - if f != constants.CIB_F: - consolidate(workdir, f) - - out_string += "\n" - - out_string += check_crmvfy(workdir) - out_string += check_backtraces(workdir) - out_string += check_permissions(workdir) - out_string += check_logs(workdir) - - analyze_f = os.path.join(workdir, constants.ANALYSIS_F) - crmutils.str2file(out_string, analyze_f) - - -def analyze_one(workdir, file_): - out_string = "" - tmp_string = "" - tmp_rc = 0 - node0 = "" - rc = 0 - for n in constants.NODES.split(): - if node0: - tmp_rc, tmp_string = diff_check(os.path.join(workdir, node0, file_), os.path.join(workdir, n, file_)) - out_string += tmp_string - rc += tmp_rc - else: - node0 = n - return (rc, out_string) - - -def base_check(): - if not which("which"): - log_fatal("please install the which(1) program") - if not os.path.exists(constants.BIN_CRM): - if os.path.exists("/usr/bin/crm"): - constants.BIN_CRM = "/usr/bin/crm" - else: - log_fatal("Cannot find crm command!") - - -def booth_info(): - if not which("booth"): - return "" - return get_command_info("booth --version")[1] - - -def check_backtraces(workdir): - out_string = "" - pattern = "Core was generated|Program terminated" - for n in constants.NODES.split(): - bt_f = os.path.join(workdir, n, constants.BT_F) - if os.path.isfile(bt_f) and os.stat(bt_f).st_size != 0: - out_string += "WARN: coredumps found at %s:\n" % n - for line in grep(pattern, infile=bt_f): - out_string += " %s\n" % line - return out_string - - -def check_crmvfy(workdir): - """ - some basic analysis of the report - """ - out_string = "" - for n in constants.NODES.split(): - crm_verify_f = os.path.join(workdir, n, constants.CRM_VERIFY_F) - if os.path.isfile(crm_verify_f) and os.stat(crm_verify_f).st_size != 0: - out_string += "WARN: crm_verify reported warnings at %s:\n" % n - out_string += open(crm_verify_f).read() - return out_string - - -def check_env(): - set_env() - base_check() - get_ocf_dir() - load_ocf_dirs() - - -def check_if_log_is_empty(): - for f in find_files_all(constants.HALOG_F, constants.WORKDIR): - if os.stat(f).st_size == 0: - logger.warning("Report contains no logs; did you get the right timeframe?") - - -def check_logs(workdir): - out_string = "" - log_list = [] - for l in constants.EXTRA_LOGS.split(): - log_list += find_files_all(os.path.basename(l), workdir) - if not log_list: - return out_string - - out_string += "\nLog patterns:\n" - log_patterns = constants.LOG_PATTERNS.replace(' ', '|') - for n in constants.NODES.split(): - for f in log_list: - out_string += '\n'.join(grep(log_patterns, infile=f)) - return out_string - - -def check_permissions(workdir): - out_string = "" - for n in constants.NODES.split(): - permissions_f = os.path.join(workdir, n, constants.PERMISSIONS_F) - if os.path.isfile(permissions_f) and os.stat(permissions_f).st_size != 0: - out_string += "Checking problems with permissions/ownership at %s:\n" % n - out_string += open(permissions_f).read() - return out_string - - -def check_time(var, option): - if not var: - log_fatal("""bad time specification: %s - try these like: 2pm - 1:00 - "2007/9/5 12:30" - "09-Sep-07 2:00" - """ % option) - - -def cib_diff(file1, file2): - """ - check if files have same content in the cluster - """ - code = 0 - out_string = "" - tmp_string = "" - d1 = os.path.dirname(file1) - d2 = os.path.dirname(file2) - if (os.path.isfile(os.path.join(d1, "RUNNING")) and - os.path.isfile(os.path.join(d2, "RUNNING"))) or \ - (os.path.isfile(os.path.join(d1, "STOPPED")) and - os.path.isfile(os.path.join(d2, "STOPPED"))): - if which("crm_diff"): - code, tmp_string = get_command_info("crm_diff -c -n %s -o %s" % (file1, file2)) - out_string += tmp_string - else: - code = 1 - logger.warning("crm_diff(8) not found, cannot diff CIBs") - else: - code = 1 - out_string += "can't compare cibs from running and stopped systems\n" - return code, out_string - - -def cluster_info(): - return get_command_info("corosync -v")[1] - - -def generate_collect_functions(): - """ - Generate function list from collect.py - """ - return [func for func, _ in getmembers(collect, isfunction) if func.startswith("collect_")] - - -def collect_info(): - """ - get all other info (config, stats, etc) - """ - collect_func_list = generate_collect_functions() - # Make sure not to occupy all CPUs - pool = multiprocessing.Pool(round(0.8 * multiprocessing.cpu_count())) - # result here to store AsyncResult object returned from apply_async - # Then calling get() method will catch exceptions like NameError, AttributeError, etc. - # Otherwise parent process will not know these exceptions raised - # Calling get() right after apply_async will be blocked until child process finished, so - # need to append to a list firstly - result_list = [] - for cf in collect_func_list: - result = pool.apply_async(getattr(collect, cf)) - result_list.append(result) - pool.close() - pool.join() - - for result in result_list: - try: - result.get() - except Exception as err: - logger.error(str(err)) - - logfile_list = [] - corosync_log = corosync.get_value('logging.logfile') - if corosync_log: - logfile_list.append(corosync_log) - logfile_list += constants.EXTRA_LOGS.split() - - for l in logfile_list: - if not os.path.isfile(l): - continue - if l == constants.HA_LOG and l != constants.HALOG_F: - os.symlink(constants.HALOG_F, os.path.join(constants.WORKDIR, os.path.basename(l))) - continue - if is_our_log(l, constants.FROM_TIME, constants.TO_TIME) == 4: - logger.debug("found irregular log file %s", l) - outf = os.path.join(constants.WORKDIR, os.path.basename(l)) - shutil.copy2(l, constants.WORKDIR) - log_size(l, outf+'.info') - continue - getstampproc = find_getstampproc(l) - if getstampproc: - constants.GET_STAMP_FUNC = getstampproc - outf = os.path.join(constants.WORKDIR, os.path.basename(l)) - if dump_logset(l, constants.FROM_TIME, constants.TO_TIME, outf): - log_size(l, outf+'.info') - else: - logger.debug("could not figure out the log format of %s", l) - - -def collect_journal(from_t, to_t, outf): - if not which("journalctl"): - logger.warning("Command journalctl not found") - return - - if crmutils.is_int(from_t) and from_t == 0: - from_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M") - elif crmutils.is_int(from_t): - from_time = ts_to_dt(from_t).strftime("%Y-%m-%d %H:%M") - if crmutils.is_int(to_t) and to_t == 0: - to_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M") - elif crmutils.is_int(to_t): - to_time = ts_to_dt(to_t).strftime("%Y-%m-%d %H:%M") - if os.path.isfile(outf): - logger.warning("%s already exists", outf) - - logger.debug("journalctl from: '%d' until: '%d' from_time: '%s' to_time: '%s' > %s", - from_t, to_t, from_time, to_time, outf) - cmd = 'journalctl -o short-iso-precise --since "%s" --until "%s" --no-pager | tail -n +2' % \ - (from_time, to_time) - crmutils.str2file(get_command_info(cmd)[1], outf) - - -def compatibility_pcmk(): - get_crm_daemon_dir() - if not constants.CRM_DAEMON_DIR: - log_fatal("cannot find pacemaker daemon directory!") - get_pe_state_dir() - if not constants.PE_STATE_DIR: - log_fatal("cannot find pe daemon directory!") - get_cib_dir() - if not constants.CIB_DIR: - log_fatal("cannot find cib daemon directory!") - - constants.PCMK_LIB = os.path.dirname(constants.CIB_DIR) - logger.debug("setting PCMK_LIB to %s", constants.PCMK_LIB) - constants.CORES_DIRS = os.path.join(constants.PCMK_LIB, "cores") - constants.CONF = "/etc/corosync/corosync.conf" - if os.path.isfile(constants.CONF): - constants.CORES_DIRS += " /var/lib/corosync" - constants.B_CONF = os.path.basename(constants.CONF) - - -def consolidate(workdir, f): - """ - remove duplicates if files are same, make links instead - """ - for n in constants.NODES.split(): - if os.path.isfile(os.path.join(workdir, f)): - os.remove(os.path.join(workdir, n, f)) - else: - shutil.move(os.path.join(workdir, n, f), workdir) - os.symlink("../%s" % f, os.path.join(workdir, n, f)) - - -def create_tempfile(time=None): - random_str = random_string(4) - try: - filename = tempfile.mkstemp(suffix=random_str, prefix="tmp.")[1] - except: - log_fatal("Can't create file %s" % filename) - if time: - os.utime(filename, (time, time)) - return filename - - -def date(): - return datetime.datetime.now().strftime("%a %b %-d %H:%M:%S CST %Y") - - -def diff_check(file1, file2): - out_string = "" - for f in [file1, file2]: - if not os.path.isfile(f): - out_string += "%s does not exist\n" % f - return (1, out_string) - if os.path.basename(file1) == constants.CIB_F: - return cib_diff(file1, file2) - else: - return (0, txt_diff(file1, file2)) - - -def get_distro_info(): - """ - get distribution information - """ - res = None - if os.path.exists(constants.OSRELEASE): - logger.debug("Using {} to get distribution info".format(constants.OSRELEASE)) - res = re.search("PRETTY_NAME=\"(.*)\"", read_from_file(constants.OSRELEASE)) - elif which("lsb_release"): - logger.debug("Using lsb_release to get distribution info") - out = sh.LocalShell().get_stdout_or_raise_error("lsb_release -d") - res = re.search("Description:\s+(.*)", out) - return res.group(1) if res else "Unknown" - - -def dump_log(logf, from_line, to_line): - if not from_line: - return - return filter_lines(logf, from_line, to_line) - - -def dump_logset(logf, from_time, to_time, outf): - """ - find log/set of logs which are interesting for us - """ - if os.stat(logf).st_size == 0: - return False - logf_set = arch_logs(logf, from_time, to_time) - if not logf_set: - return False - num_logs = len(logf_set) - oldest = logf_set[-1] - newest = logf_set[0] - mid_logfiles = logf_set[1:-1] - out_string = "" - - # the first logfile: from $from_time to $to_time (or end) - # logfiles in the middle: all - # the last logfile: from beginning to $to_time (or end) - if num_logs == 1: - out_string += print_logseg(newest, from_time, to_time) - else: - out_string += print_logseg(oldest, from_time, 0) - for f in mid_logfiles: - out_string += print_logseg(f, 0, 0) - logger.debug("including complete %s logfile", f) - out_string += print_logseg(newest, 0, to_time) - - crmutils.str2file(out_string, outf) - return True - - -def dump_state(workdir): - res = grep("^Last upd", incmd="crm_mon -1", flag="v") - crmutils.str2file('\n'.join(res), os.path.join(workdir, constants.CRM_MON_F)) - cmd = "cibadmin -Ql" - crmutils.str2file(get_command_info(cmd)[1], os.path.join(workdir, constants.CIB_F)) - cmd = "crm_node -p" - crmutils.str2file(get_command_info(cmd)[1], os.path.join(workdir, constants.MEMBERSHIP_F)) - - -def events(destdir): - events_f = os.path.join(destdir, "events.txt") - out_string = "" - pattern = '|'.join(constants.EVENT_PATTERNS.split()[1::2]) - halog_f = os.path.join(destdir, constants.HALOG_F) - if os.path.isfile(halog_f): - out_string = '\n'.join(grep(pattern, infile=halog_f)) - crmutils.str2file(out_string, events_f) - for n in constants.NODES.split(): - if os.path.isdir(os.path.join(destdir, n)): - events_node_f = os.path.join(destdir, n, "events.txt") - out_string = '\n'.join(grep(" %s " % n, infile=events_f)) - crmutils.str2file(out_string, events_node_f) - else: - for n in constants.NODES.split(): - halog_f = os.path.join(destdir, n, constants.HALOG_F) - if not os.path.isfile(halog_f): - continue - out_string = '\n'.join(grep(pattern, infile=halog_f)) - crmutils.str2file(out_string, os.path.join(destdir, n, "events.text")) - - -def find_decompressor(log_file): - decompressor = "cat" - if re.search("bz2$", log_file): - decompressor = "bzip2 -dc" - elif re.search("gz$", log_file): - decompressor = "gzip -dc" - elif re.search("xz$", log_file): - decompressor = "xz -dc" - return decompressor - - -def find_files(dirs, from_time, to_time): - res = [] - - if (not crmutils.is_int(from_time)) or (from_time <= 0): - logger.warning("sorry, can't find files based on time if you don't supply time") - return - - file_with_stamp = create_tempfile(from_time) - findexp = "-newer %s" % file_with_stamp - - if crmutils.is_int(to_time) and to_time > 0: - file_with_stamp = create_tempfile(to_time) - findexp += " ! -newer %s" % file_with_stamp - - cmd = r"find %s -type f %s" % (dirs, findexp) - cmd_res = get_command_info(cmd)[1].strip() - if cmd_res: - res = cmd_res.split('\n') - - os.remove(file_with_stamp) - return res - - -def find_files_all(name, path): - result = [] - for root, dirs, files in os.walk(path): - if name in files: - result.append(os.path.join(root, name)) - return result - - -def find_first_ts(data): - ts = None - for line in data: - ts = get_ts(line) - if ts: - break - return ts - - -def filter_lines(data, from_line, to_line): - out_string = "" - count = 1 - for line in data.split('\n'): - if count >= from_line and count <= to_line: - out_string += line + '\n' - if count > to_line: - break - count += 1 - return out_string - - -def finalword(): - if constants.COMPRESS == 1: - logger.info("The report is saved in %s/%s.tar%s", constants.DESTDIR, constants.DEST, constants.COMPRESS_EXT) - else: - logger.info("The report is saved in %s/%s", constants.DESTDIR, constants.DEST) - if constants.TO_TIME == 0: - to_time = datetime.datetime.now().strftime("%x %X") - else: - to_time = ts_to_dt(constants.TO_TIME).strftime("%x %X") - logger.info("Report timespan: %s - %s", ts_to_dt(constants.FROM_TIME).strftime("%x %X"), to_time) - logger.info("Thank you for taking time to create this report.") - - -def find_getstampproc(log_file): - func = None - loop_cout = 10 - with open(log_file, 'r', encoding='utf-8', errors='replace') as f: - for line in f.readlines(): - if loop_cout == 0: - break - else: - loop_cout -= 1 - with stdchannel_redirected(sys.stderr, os.devnull): - func = find_getstampproc_raw(line.strip('\n')) - if func: - break - return func - - -def find_getstampproc_raw(line): - func = None - res = get_stamp_syslog(line) - if res: - func = "syslog" - return func - res = get_stamp_rfc5424(line) - if res: - func = "rfc5424" - return func - res = get_stamp_legacy(line) - if res: - func = "legacy" - logger.debug("the log file is in the legacy format (please consider switching to syslog format)") - return func - return func - - -def find_log(): - """ - first try syslog files, if none found then use the - logfile/debugfile settings - """ - if constants.EXTRA_LOGS: - for l in constants.EXTRA_LOGS.split(): - if os.path.isfile(l): - return l - - tmp_f = os.path.join(constants.WORKDIR, constants.JOURNAL_F) - if os.path.isfile(tmp_f): - return tmp_f - - for l in constants.PCMK_LOG.split(): - if os.path.isfile(l): - return l - - if constants.HA_DEBUGFILE: - logger.debug("will try with %s", constants.HA_DEBUGFILE) - return constants.HA_DEBUGFILE - - -def find_ssh_user(): - ssh_user = "" - ssh_user_try_list = [] - if constants.SSH_USER: - ssh_user_try_list.append(constants.SSH_USER) - sudoer = userdir.get_sudoer() - if sudoer: - ssh_user_try_list.append(sudoer) - current_user = userdir.getuser() - ssh_user_try_list.append(current_user) - - for n in constants.NODES.split(): - if n == constants.WE: - continue - rc = False - for u in ssh_user_try_list: - ssh_s = f"{u}@{n}" - if not crmutils.check_ssh_passwd_need(u, u, n): - logger.debug("ssh %s OK", ssh_s) - ssh_user = u - rc = True - break - else: - logger.debug("ssh %s failed", ssh_s) - if not rc: - constants.SSH_PASSWORD_NODES.append(n) - - if constants.SSH_PASSWORD_NODES: - logger.warning("passwordless ssh to node(s) %s does not work", constants.SSH_PASSWORD_NODES) - if ssh_user: - constants.SUDO = "" if ssh_user in ["root", "hacluster"] else "sudo" - constants.SSH_USER = ssh_user if ssh_user else "" - - -def findln_by_time(data, ts): - ''' - Get line number of the specific time stamp - ''' - data_list = data.split('\n') - - first= 1 - last= len(data_list) - time_middle = None - - while first <= last: - middle = (last + first) // 2 - trycnt = 10 - while trycnt > 0: - res = line_time(data_list, middle) - if res: - time_middle = res - break - trycnt -= 1 - # shift the whole first-last segment - prevmid = middle - while prevmid == middle: - first -= 1 - if first < 1: - first = 1 - last -= 1 - if last < first: - last = first - prevmid = middle - middle = (last + first) // 2 - if first == last: - break - if not time_middle: - return None - if time_middle > ts: - last = middle - 1 - elif time_middle < ts: - first = middle + 1 - else: - break - return middle - - -def find_binary_for_core(corefile): - """ - Given a core file, try to find the - binary that generated it - Returns a path or None - """ - def findbin(fname): - def isexec(filename): - return os.path.isfile(filename) and os.access(filename, os.X_OK) - bindirs = [constants.HA_BIN, constants.CRM_DAEMON_DIR] - if which(fname): - return fname - else: - for d in bindirs: - if d is None: - continue - testpath = os.path.join(d, fname) - if isexec(testpath): - return testpath - return None - if which("cat"): - random_binary = "cat" - lines = [l for l in get_command_info_timeout(["gdb", random_binary, corefile]).splitlines() if "Core was generated by" in l] - binname = None - if len(lines) > 0: - m = re.search(r"generated by .([^']+)", ) - if m: - fname = m.group(1) - binname = findbin(fname) - if binname is not None: - logger.debug("found the program at %s for core %s", testpath, corefile) - else: - logger.warning("Could not find the program path for core %s", corefile) - return binname - - -def print_core_backtraces(flist): - """ - Use gdb to get backtrace from core files. - flist: names of core files to check - """ - if not which("gdb"): - logger.warning("Please install gdb to get backtraces") - return - for corefile in flist: - absbinpath = find_binary_for_core(corefile) - if absbinpath is None: - continue - get_debuginfo(absbinpath, corefile) - bt_opts = os.environ.get("BT_OPTS", "thread apply all bt full") - print("====================== start backtrace ======================") - print(get_command_info_timeout(["ls", "-l", corefile])) - print(get_command_info_timeout(["gdb", "-batch", "-n", "-quiet", - "-ex", bt_opts, "-ex", "quit", - absbinpath, corefile])) - print("======================= end backtrace =======================") - - -def get_cib_dir(): - try: - constants.CIB_DIR = crmsh.config.path.crm_config - except: - return - if not os.path.isdir(constants.CIB_DIR): - constants.CIB_DIR = None - - -def get_command_info(cmd): - code, out, err = ShellUtils().get_stdout_stderr(cmd) - if out: - return (code, out + '\n') - else: - return (code, "") - - -def get_command_info_timeout(cmd, timeout=5): - # Python 101: How to timeout a subprocess - def kill(process): - process.kill() - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - my_timer = Timer(timeout, kill, [proc]) - try: - my_timer.start() - stdout, stderr = proc.communicate() - finally: - my_timer.cancel() - - if stdout and proc.returncode == 0: - return crmutils.to_ascii(stdout) - else: - return "" - - -def get_conf_var(option, default=None): - ret = default - if os.path.isfile(constants.CONF): - data = read_from_file(constants.CONF) - for line in data.split('\n'): - if re.match("^\s*%s\s*:" % option, line): - ret = line.split(':')[1].lstrip() - return ret - - -def get_crm_daemon_dir(): - try: - constants.CRM_DAEMON_DIR = crmsh.config.path.crm_daemon_dir - except: - return - if not os.path.isdir(constants.CRM_DAEMON_DIR) or \ - not any(is_exec(os.path.join(constants.CRM_DAEMON_DIR, cmd)) for cmd in ["crmd", "pacemaker-controld"]): - constants.CRM_DAEMON_DIR = None - - -def get_dirname(path): - tmp = os.path.dirname(path) - if not tmp: - tmp = "." - return tmp - - -def get_local_ip(): - local_ip = [] - ip_pattern = "[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}" - for line in grep(ip_pattern, incmd="corosync-cfgtool -s"): - local_ip.append(line.split()[2]) - return local_ip - - -def get_log_vars(): - if is_conf_set("debug"): - constants.HA_LOGLEVEL = "debug" - if is_conf_set("to_logfile"): - constants.HA_LOGFILE = get_conf_var("logfile", default="syslog") - constants.HA_DEBUGFILE = constants.HA_LOGFILE - elif is_conf_set("to_syslog"): - constants.HA_LOGFACILITY = get_conf_var("syslog_facility", default="daemon") - - logger.debug("log settings: facility=%s logfile=%s debugfile=%s", - constants.HA_LOGFACILITY, constants.HA_LOGFILE, constants.HA_DEBUGFILE) - - -def get_nodes(): - """ - find nodes for this cluster - """ - nodes = [] - # 1. set by user? - if constants.USER_NODES: - nodes = constants.USER_NODES.split() - # 2. running crm - elif crmutils.is_process("pacemaker-controld") or crmutils.is_process("crmd"): - cmd = "crm node server" - nodes = get_command_info(cmd)[1].strip().split('\n') - # 3. if the cluster's stopped, try the CIB - else: - cmd = r"(CIB_file=%s/%s crm node server)" % (constants.CIB_DIR, constants.CIB_F) - nodes = get_command_info(cmd)[1].strip().split('\n') - - return nodes - - -def get_peer_ip(): - local_ip = get_local_ip() - peer_ip = [] - ip_pattern = "[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}" - for line in grep("runtime.*.srp.*.ip", incmd="corosync-cmapctl"): - for ip in re.findall(ip_pattern, line): - if ip not in local_ip: - peer_ip.append(ip) - return peer_ip - - -def get_ocf_dir(): - ocf_dir = None - try: - ocf_dir = crmsh.config.path.ocf_root - except: - log_fatal("Can not find OCF_ROOT_DIR!") - if not os.path.isdir(ocf_dir): - log_fatal("Directory %s is not OCF_ROOT_DIR!" % ocf_dir) - constants.OCF_DIR = ocf_dir - - -def get_pe_state_dir(): - try: - constants.PE_STATE_DIR = crmsh.config.path.pe_state_dir - except: - return - if not os.path.isdir(constants.PE_STATE_DIR): - constants.PE_STATE_DIR = None - - -def get_pkg_mgr(): - pkg_mgr = None - - if which("dpkg"): - pkg_mgr = "deb" - elif which("rpm"): - pkg_mgr = "rpm" - elif which("pkg_info"): - pkg_mgr = "pkg_info" - elif which("pkginfo"): - pkg_mgr = "pkginfo" - else: - logger.warning("Unknown package manager!") - - return pkg_mgr - - -def get_stamp_legacy(line): - try: - res = crmutils.parse_time(line.split()[1], quiet=True) - except: - return None - return res - - -def get_stamp_rfc5424(line): - try: - res = crmutils.parse_time(line.split()[0], quiet=True) - except: - return None - return res - - -def get_stamp_syslog(line): - try: - res = crmutils.parse_time(' '.join(line.split()[0:3]), quiet=True) - except: - return None - return res - - -def get_ts(line): - ts = None - with stdchannel_redirected(sys.stderr, os.devnull): - if not constants.GET_STAMP_FUNC: - func = find_getstampproc_raw(line) - else: - func = constants.GET_STAMP_FUNC - if func: - if func == "rfc5424": - ts = crmutils.parse_to_timestamp(line.split()[0], quiet=True) - if func == "syslog": - ts = crmutils.parse_to_timestamp(' '.join(line.split()[0:3]), quiet=True) - if func == "legacy": - ts = crmutils.parse_to_timestamp(line.split()[1], quiet=True) - return ts - - -def grep(pattern, infile=None, incmd=None, flag=None): - res = [] - if not infile and not incmd: - return res - - if infile: - if os.path.isfile(infile): - res = grep_file(pattern, infile, flag) - elif os.path.isdir(infile): - for root, dirs, files in os.walk(infile): - for f in files: - res += grep_file(pattern, os.path.join(root, f), flag) - else: - for f in glob.glob(infile): - res += grep_file(pattern, f, flag) - elif incmd: - res += grep_row(pattern, get_command_info(incmd)[1], flag) - - if flag and "q" in flag: - return len(res) != 0 - return res - - -def grep_file(pattern, infile, flag): - res = [] - with open(infile, 'r', encoding='utf-8', errors="replace") as fd: - res = grep_row(pattern, fd.read(), flag) - if res: - if flag and "l" in flag: - return [infile] - return res - - -def grep_row(pattern, indata, flag): - INVERT = False - SHOWNUM = False - reflag = 0 - if flag: - if "v" in flag: - INVERT = True - if "i" in flag: - reflag |= re.I - if "w" in flag: - pattern = r"\b%s\b" % pattern - if "n" in flag: - SHOWNUM = True - - res = [] - count = 0 - for line in indata.split('\n')[:-1]: - count += 1 - if re.search(pattern, line, reflag): - if not INVERT: - if SHOWNUM: - res.append("%d:%s" % (count, line)) - else: - res.append(line) - elif INVERT: - if SHOWNUM: - res.append("%d:%s" % (count, line)) - else: - res.append(line) - return res - - -def head(n, indata): - return indata.split('\n')[:n] - - -def is_conf_set(option, subsys=None): - subsys_start = 0 - if os.path.isfile(constants.CONF): - data = read_from_file(constants.CONF) - for line in data.split('\n'): - if re.search("^\s*subsys\s*:\s*%s$" % subsys, line): - subsys_start = 1 - if subsys_start == 1 and re.search("^\s*}", line): - subsys_start = 0 - if re.match("^\s*%s\s*:\s*(on|yes)$" % option, line): - if not subsys or subsys_start == 1: - return True - return False - - -def is_exec(filename): - return os.path.isfile(filename) and os.access(filename, os.X_OK) - - -def is_our_log(logf, from_time, to_time): - """ - check if the log contains a piece of our segment - """ - data = read_from_file(logf) - if not data: - logger.debug("Found empty file \"%s\"; exclude", logf) - return 0 - first_time = find_first_ts(head(10, data)) - last_time = find_first_ts(tail(10, data)) - - if (not first_time) or (not last_time): - if os.stat(logf).st_size > 0: - return 4 # irregular log, not empty - return 0 # skip (empty log?) - if from_time > last_time: - # we shouldn't get here anyway if the logs are in order - return 2 # we're past good logs; exit - if from_time >= first_time: - return 3 # this is the last good log - if to_time == 0 or to_time >= first_time: - return 1 # include this log - else: - return 0 # don't include this log - - - -def line_time(data_list, line_num): - ''' - Get time stamp of the specific line - ''' - return get_ts(data_list[line_num-1]) - - -def load_ocf_dirs(): - inf = "%s/lib/heartbeat/ocf-directories" % constants.OCF_DIR - if not os.path.isfile(inf): - log_fatal("file %s not exist" % inf) - constants.HA_VARLIB = grep("HA_VARLIB:=", infile=inf)[0].split(':=')[1].strip('}') - constants.HA_BIN = grep("HA_BIN:=", infile=inf)[0].split(':=')[1].strip('}') - - -def log_fatal(msg): - logger.error(msg) - sys.exit(1) - - -def log_size(logf, outf): - l_size = os.stat(logf).st_size + 1 - out_string = "%s %d" % (logf, l_size) - crmutils.str2file(out_string, outf) - - -def make_temp_dir(): - dir_path = r"/tmp/.crm_report.workdir.%s" % random_string(6) - _mkdir(dir_path) - return dir_path - - -def mktemplate(argv): - """ - description template, editing, and other notes - """ - workdir = constants.WORKDIR - out_string = constants.EMAIL_TMPLATE.format("%s" % date(), ' '.join(argv[1:])) - sysinfo_f = os.path.join(workdir, constants.SYSINFO_F) - if os.path.isfile(sysinfo_f): - out_string += "Common saystem info found:\n" - with open(sysinfo_f, 'r') as f: - out_string += f.read() - else: - for n in constants.NODES.split(): - sysinfo_node_f = os.path.join(workdir, n, constants.SYSINFO_F) - if os.path.isfile(sysinfo_node_f): - out_string += "System info %s:\n" % n - out_string += sed_inplace(sysinfo_node_f, r'^', ' ') - out_string += "\n" - crmutils.str2file(out_string, os.path.join(workdir, constants.DESCRIPTION_F)) - - -def pe_to_dot(pe_file): - dotf = '.'.join(pe_file.split('.')[:-1]) + '.dot' - cmd = "%s -D %s -x %s" % (constants.PTEST, dotf, pe_file) - code, _ = ShellUtils().get_stdout(cmd) - if code != 0: - logger.warning("pe_to_dot: %s -> %s failed", pe_file, dotf) - - -def pick_compress(): - prog, ext = pick_first_compress() - if prog: - constants.COMPRESS_PROG, constants.COMPRESS_EXT = prog, ext - else: - logger.warning("the resulting tarball may be huge") - constants.COMPRESS_PROG = "cat" - - -def pick_first_compress(): - compress_prog_suffix_dict = { - "gzip": ".gz", - "bzip2": ".bz2", - "xz": ".xz" - } - for cmd, suffix in compress_prog_suffix_dict.items(): - if shutil.which(cmd): - return cmd, suffix - logger.warning("Could not find a compression program") - return None, None - - -def pkg_ver_deb(packages): - res = "" - for pack in packages.split(): - code, out = get_command_info("dpkg-query -s %s" % pack) - if code != 0: - continue - distro = "Unknown" - for line in out.split('\n'): - if re.match("^Package\s*:", line): - name = line.split(':')[1].lstrip() - elif re.match("^Version\s*:", line): - version = line.split(':')[1].lstrip() - elif re.match("^Architecture\s*:", line): - arch = line.split(':')[1].lstrip() - res += "%s %s - %s\n" % (name, version, arch) - return res - - -def pkg_ver_pkg_info(packages): - pass - - -def pkg_ver_pkginfo(packages): - pass - - -def pkg_ver_rpm(packages): - res = "" - for pack in packages.split(): - code, out = get_command_info("rpm -qi %s" % pack) - if code != 0: - continue - distro = "Unknown" - for line in out.split('\n'): - if re.match("^Name\s*:", line): - name = line.split(':')[1].lstrip() - elif re.match("^Version\s*:", line): - version = line.split(':')[1].lstrip() - elif re.match("^Release\s*:", line): - release = line.split(':')[1].lstrip() - elif re.match("^Distribution\s*:", line): - distro = line.split(':')[1].lstrip() - elif re.match("^Architecture\s*:", line): - arch = line.split(':')[1].lstrip() - res += "%s %s-%s - %s %s\n" % (name, version, release, distro, arch) - return res - - -def pkg_versions(packages): - pkg_mgr = get_pkg_mgr() - if not pkg_mgr: - return "" - logger.debug("the package manager is %s", pkg_mgr) - if pkg_mgr == "deb": - return pkg_ver_deb(packages) - if pkg_mgr == "rpm": - return pkg_ver_rpm(packages) - if pkg_mgr == "pkg_info": - return pkg_ver_pkg_info(packages) - if pkg_mgr == "pkginfo": - return pkg_ver_pkginfo(packages) - - -def print_log(logf): - """ - print part of the log - """ - cat = find_decompressor(logf) - cmd = "%s %s" % (cat, logf) - out = ShellUtils().get_stdout(cmd) - return out - - -def print_logseg(logf, from_time, to_time): - data = read_from_file(logf) - - if from_time == 0: - from_line = 1 - else: - from_line = findln_by_time(data, from_time) - if from_line is None: - return "" - - if to_time == 0: - to_line = len(data.split('\n')) - else: - to_line = findln_by_time(data, to_time) - if to_line is None: - return "" - - logger.debug("Including segment [%d-%d] from %s", from_line, to_line, logf) - return filter_lines(data, from_line, to_line) - - -def ra_build_info(): - out = "UNKnown" - inf = "%s/lib/heartbeat/ocf-shellfuncs" % constants.OCF_DIR - res = grep("Build version:", infile=inf) - if res and not re.search(r"\$Format:%H\$", res[0]): - out = res[0] - return "resource-agents: %s\n" % out - - -def random_string(num): - tmp = [] - if crmutils.is_int(num) and num > 0: - s = string.ascii_letters + string.digits - tmp = random.sample(s, num) - return ''.join(tmp) - - -def sanitize(): - """ - replace sensitive info with '****' - """ - logger.debug("Check or replace sensitive info from cib, pe and log files") - - get_sensitive_key_value_list() - - work_dir = constants.WORKDIR - file_list = [] - for (dirpath, dirnames, filenames) in os.walk(work_dir): - for _file in filenames: - file_list.append(os.path.join(dirpath, _file)) - - for f in [item for item in file_list if os.path.isfile(item)]: - rc = sanitize_one(f) - if rc == 1: - logger.warning("Some PE/CIB/log files contain possibly sensitive data") - logger.warning("Using \"-s\" option can replace sensitive data") - break - - -def sanitize_one(in_file): - """ - Open the file, replace sensitive string and write back - """ - data = read_from_file(in_file) - if not data: - return - if not include_sensitive_data(data): - return - if not constants.DO_SANITIZE: - return 1 - logger.debug("Replace sensitive info for %s", in_file) - write_to_file(in_file, sub_sensitive_string(data)) - - -def parse_sanitize_rule(rule_string): - for rule in rule_string.split(): - if ':' in rule: - key, value = rule.split(':') - if value != "raw": - log_fatal("For sanitize_pattern {}, option should be \"raw\"".format(key)) - constants.SANITIZE_RULE_DICT[key] = value - else: - constants.SANITIZE_RULE_DICT[rule] = None - - -def say_ssh_user(): - if not constants.SSH_USER: - return "you user" - else: - return constants.SSH_USER - - -def sed_inplace(filename, pattern, repl): - out_string = "" - - pattern_compiled = re.compile(pattern) - with open(filename, 'r') as fd: - for line in fd: - out_string += pattern_compiled.sub(repl, line) - - return out_string - - -def set_env(): - os.environ["LC_ALL"] = "POSIX" - - -@contextlib.contextmanager -def stdchannel_redirected(stdchannel, dest_filename): - """ - A context manager to temporarily redirect stdout or stderr - e.g.: - with stdchannel_redirected(sys.stderr, os.devnull): - if compiler.has_function('clock_gettime', libraries=['rt']): - libraries.append('rt') - """ - - try: - oldstdchannel = os.dup(stdchannel.fileno()) - dest_file = open(dest_filename, 'w') - os.dup2(dest_file.fileno(), stdchannel.fileno()) - yield - - finally: - if oldstdchannel is not None: - os.dup2(oldstdchannel, stdchannel.fileno()) - if dest_file is not None: - dest_file.close() - - -def start_slave_collector(node, arg_str): - cmd = "{} report __slave".format(constants.BIN_CRM) - if node == constants.WE: - for item in arg_str.split(): - cmd += " {}".format(str(item)) - _, out = ShellUtils().get_stdout(cmd) - else: - node = f"{constants.SSH_USER}@{node}" if constants.SSH_USER else node - cmd = r'ssh {} {} "{} {}"'.format(constants.SSH_OPTS, node, constants.SUDO, cmd) - for item in arg_str.split(): - cmd += " {}".format(str(item)) - code, out, err = sh.LocalShell().get_rc_stdout_stderr(constants.SSH_USER, cmd) - if code != 0: - logger.warning(err) - for ip in get_peer_ip(): - logger.info("Trying connect by %s", ip) - cmd = cmd.replace(node, ip, 1) - code, out, err = ShellUtils().get_stdout_stderr(cmd) - if code != 0: - logger.warning(err) - break - if err: - print(err, file=sys.stderr) - - if out == '': # if we couldn't get anything - return - - compress_data = "" - for data in out.split('\n'): - if data.startswith(constants.COMPRESS_DATA_FLAG): - # crm report data from collector - compress_data = data.lstrip(constants.COMPRESS_DATA_FLAG) - else: - # log data from collector - print(data) - - cmd = r"(cd {} && tar xf -)".format(constants.WORKDIR) - ShellUtils().get_stdout(cmd, input_s=eval(compress_data)) - - -def str_to_bool(v): - return v.lower() in ["true"] - - -def tail(n, indata): - return indata.split('\n')[-n:] - - -def dump_D_process(): - ''' - dump D-state process stack - ''' - out_string = "" - _, out, _ = ShellUtils().get_stdout_stderr("ps aux|awk '$8 ~ /^D/{print $2}'") - len_D_process = len(out.split('\n')) if out else 0 - out_string += "Dump D-state process stack: {}\n".format(len_D_process) - if len_D_process == 0: - return out_string - for pid in out.split('\n'): - _, cmd_out, _ = ShellUtils().get_stdout_stderr("cat /proc/{}/comm".format(pid)) - out_string += "pid: {} comm: {}\n".format(pid, cmd_out) - _, stack_out, _ = ShellUtils().get_stdout_stderr("cat /proc/{}/stack".format(pid)) - out_string += stack_out + "\n\n" - return out_string - - -def lsof_ocfs2_device(): - """ - List open files for OCFS2 device - """ - out_string = "" - _, out, _ = ShellUtils().get_stdout_stderr("mount") - dev_list = re.findall("\n(.*) on .* type ocfs2 ", out) - for dev in dev_list: - cmd = "lsof {}".format(dev) - out_string += "\n\n#=====[ Command ] ==========================#\n" - out_string += "# {}\n".format(cmd) - _, cmd_out, _ = ShellUtils().get_stdout_stderr(cmd) - if cmd_out: - out_string += cmd_out - return out_string - - -def touch_r(src, dst): - """ - like shell command "touch -r src dst" - """ - if not os.path.exists(src): - logger.warning("In touch_r function, %s not exists", src) - return - stat_info = os.stat(src) - os.utime(dst, (stat_info.st_atime, stat_info.st_mtime)) - - -def ts_to_dt(timestamp): - """ - timestamp convert to datetime; consider local timezone - """ - dt = crmutils.timestamp_to_datetime(timestamp) - dt += tz.tzlocal().utcoffset(dt) - return dt - - -def txt_diff(file1, file2): - return get_command_info("diff -bBu %s %s" % (file1, file2))[1] - - -def verify_deb(packages): - res = "" - for pack in packages.split(): - cmd = r"dpkg --verify %s | grep -v 'not installed'" % pack - code, out = ShellUtils().get_stdout(cmd) - if code != 0 and out: - res = "For package %s:\n" % pack - res += out + "\n" - return res - - -def verify_packages(packages): - pkg_mgr = get_pkg_mgr() - if not pkg_mgr: - return "" - if pkg_mgr == "deb": - return verify_deb(packages) - if pkg_mgr == "rpm": - return verify_rpm(packages) - if pkg_mgr == "pkg_info": - return verify_pkg_info(packages) - if pkg_mgr == "pkginfo": - return verify_pkginfo(packages) - - -def verify_pkg_info(packages): - pass - - -def verify_pkginfo(packages): - pass - - -def verify_rpm(packages): - res = "" - for pack in packages.split(): - cmd = r"rpm --verify %s|grep -v 'not installed'" % pack - code, out = ShellUtils().get_stdout(cmd) - if code != 0 and out: - res = "For package %s:\n" % pack - res += out + "\n" - return res - - -def which(prog): - code, _ = get_command_info("which %s" % prog) - if code == 0: - return True - else: - return False - - -def get_open_method(infile): - file_type_open_dict = { - "gz": gzip.open, - "bz2": bz2.open, - "xz": lzma.open - } - try: - _open = file_type_open_dict[infile.split('.')[-1]] - except KeyError: - _open = open - return _open - - -def read_from_file(infile): - data = None - _open = get_open_method(infile) - with _open(infile, 'rt', encoding='utf-8', errors='replace') as f: - try: - data = f.read() - except Exception as err: - logger.error("When reading file \"%s\": %s", infile, str(err)) - return None - return crmutils.to_ascii(data) - - -def write_to_file(tofile, data): - _open = get_open_method(tofile) - with _open(tofile, 'w') as f: - if _open == open: - f.write(data) - else: - f.write(data.encode('utf-8')) - - -def get_sensitive_key_value_list(): - """ - For each defined sanitize rule, get the sensitive value or key list - """ - for key, value in constants.SANITIZE_RULE_DICT.items(): - try: - if value == "raw": - constants.SANITIZE_VALUE_RAW += extract_sensitive_value_list(key) - else: - constants.SANITIZE_VALUE_CIB += extract_sensitive_value_list(key) - constants.SANITIZE_KEY_CIB.append(key.strip('.*?')+'.*?') - except (FileNotFoundError, EOFError) as e: - logger.warning(e) - - -def extract_sensitive_value_list(rule): - """ - Extract sensitive value from cib.xml - """ - cib_file = os.path.join(constants.WORKDIR, constants.WE, constants.CIB_F) - if not os.path.exists(cib_file): - raise FileNotFoundError("File {} was not collected".format(constants.CIB_F)) - - with open(cib_file) as fd: - data = fd.read() - if not data: - raise EOFError("File {} is empty".format(cib_file)) - - value_list = re.findall(r'name="({})" value="(.*?)"'.format(rule.strip('?')+'?'), data) - return [value[1] for value in value_list] - - -def include_sensitive_data(data): - """ - Check whether contain sensitive data - """ - if constants.SANITIZE_VALUE_RAW or constants.SANITIZE_VALUE_CIB: - return True - return False - - -def sub_sensitive_string(data): - """ - Do the replace job - - For the raw sanitize_pattern option, replace exactly the value - For the key:value nvpair sanitize_pattern, replace the value in which line contain the key - """ - result = data - if constants.SANITIZE_VALUE_RAW: - result = re.sub(r'\b({})\b'.format('|'.join(constants.SANITIZE_VALUE_RAW)), "******", data) - if constants.SANITIZE_VALUE_CIB: - result = re.sub('({})({})'.format('|'.join(constants.SANITIZE_KEY_CIB), '|'.join(constants.SANITIZE_VALUE_CIB)), '\\1******', result) - return result -# vim:ts=4:sw=4:et: diff --git a/crmsh/report/utils.py b/crmsh/report/utils.py new file mode 100644 index 0000000000..abf3813bfa --- /dev/null +++ b/crmsh/report/utils.py @@ -0,0 +1,762 @@ +# Copyright (C) 2017 Xin Liang +# See COPYING for license information. + +import bz2 +import lzma +import datetime +import glob +import gzip +import os +import re +import shutil +import sys +import traceback +from dateutil import tz +from enum import Enum +from typing import Optional, List, Tuple + +from crmsh import utils as crmutils +from crmsh import corosync, log, userdir, tmpfiles, config, sh +from crmsh.report import constants, collect, core +from crmsh.sh import ShellUtils + + +logger = log.setup_report_logger(__name__) + + +class LogType(Enum): + GOOD = 0 # good log; include + IRREGULAR = 1 # irregular log; include + EMPTY = 2 # empty log; exclude + BEFORE_TIMESPAN = 3 # log before timespan; exclude + AFTER_TIMESPAN = 4 # log after timespan; exclude + + +class ReportGenericError(Exception): + pass + + +def arch_logs(context: core.Context, logf: str) -> Tuple[List[str], LogType]: + """ + Go through archived logs and return those in timespan and the LogType + """ + return_list = [] + log_type = None + + file_list = [logf] + glob.glob(logf+"*[0-9z]") + # like ls -t, newest first + for f in sorted(file_list, key=os.path.getmtime, reverse=True): + tmp = is_our_log(context, f) + if tmp not in (LogType.GOOD, LogType.IRREGULAR): + continue + log_type = tmp + return_list.append(f) + + if return_list: + logger.debug2(f"Found logs {return_list} in {get_timespan_str(context)}") + return return_list, log_type + + +def analyze(context: core.Context) -> None: + """ + """ + result_list = [] + + result_list.append(compare_and_consolidate_files(context)) + result_list += check_collected_files(context) + result_list += extract_critical_log(context) + + analyze_f = os.path.join(context.work_dir, constants.ANALYSIS_F) + crmutils.str2file('\n'.join(result_list), analyze_f) + + +def compare_and_consolidate_files(context: core.Context) -> str: + out_string: str = "" + workdir = context.work_dir + compare_file_list = [ + constants.MEMBERSHIP_F, + constants.CRM_MON_F, + constants.COROSYNC_F, + constants.SYSINFO_F, + constants.CIB_F + ] + + for f in compare_file_list: + out_string += f"Diff {f}... " + if not glob.glob(f"{workdir}/*/{f}"): + out_string += f"no {f} found in {workdir}\n" + continue + rc, out = do_compare(context, f) + out_string += f"\n{out}\n" if out else "OK\n" + if rc == 0 and f != constants.CIB_F: + consolidate(context, f) + + return out_string + '\n' + + +def do_compare(context: core.Context, file: str) -> Tuple[int, str]: + """ + Compare file content between cluster nodes + """ + rc, out_string = 0, "" + prev_file_path = None + + for n in context.node_list: + current_file_path = os.path.join(context.work_dir, n, file) + + if prev_file_path: + rc, out = diff_check(prev_file_path, current_file_path) + out_string += f"{out}\n" if out else "" + rc += rc + else: + prev_file_path = current_file_path + + return rc, out_string + + +def check_collected_files(context: core.Context) -> List[str]: + """ + Check collected files for warnings and issues + """ + results = [] + file_description_dict = { + constants.COREDUMP_F: "WARN: Coredump found at", + constants.CRM_VERIFY_F: "WARN: crm_verify reported warnings at", + constants.PERMISSIONS_F: "Checking problems with permissions/ownership at" + } + + for node in context.node_list: + for f, desc in file_description_dict.items(): + f_in_work_dir = os.path.join(context.work_dir, node, f) + if os.path.isfile(f_in_work_dir) and not crmutils.file_is_empty(f_in_work_dir): + results.append(f"{desc} {node}:") + results.append(read_from_file(f_in_work_dir)) + + return results + + +def extract_critical_log(context: core.Context) -> List[str]: + """ + Extract warnings and errors from collected log files + """ + result_list = [] + log_pattern_list = [f".*{p}.*" for p in constants.LOG_PATTERNS.split()] + log_pattern_str = '|'.join(log_pattern_list) + + for f in glob.glob(f"{context.work_dir}/*/*.log"): + _list = re.findall(log_pattern_str, read_from_file(f)) + if _list: + result_list.append(f"\nWARNINGS or ERRORS in {'/'.join(f.split('/')[3:])}:") + result_list.extend(_list) + + return result_list + + +def cib_diff(file1: str, file2: str) -> Tuple[int, str]: + """ + check if CIB files have same content in the cluster + """ + node1_dir = os.path.dirname(file1) + node2_dir = os.path.dirname(file2) + + if (os.path.isfile(os.path.join(node1_dir, "RUNNING")) and + os.path.isfile(os.path.join(node2_dir, "RUNNING"))) or \ + (os.path.isfile(os.path.join(node1_dir, "STOPPED")) and + os.path.isfile(os.path.join(node2_dir, "STOPPED"))): + cmd = f"crm_diff -c -n {file1} -o {file2}" + code, out_string, _ = ShellUtils().get_stdout_stderr(cmd) + else: + code, out_string = 1, "Can't compare cibs from running and stopped systems\n" + return code, out_string + + +def consolidate(context: core.Context, target_file: str) -> None: + """ + Remove duplicates if files are same, make links instead + """ + workdir = context.work_dir + for node in context.node_list: + target_file_in_path = os.path.join(workdir, node, target_file) + if os.path.isfile(os.path.join(workdir, target_file)): + os.remove(target_file_in_path) + else: + shutil.move(target_file_in_path, workdir) + os.symlink(f"../{target_file}", target_file_in_path) + + +def diff_check(file1: str, file2: str) -> Tuple[int, str]: + """ + Check the differences between two files + """ + for f in [file1, file2]: + if not os.path.isfile(f): + return (1, f"{f} does not exist\n") + + diff_func = cib_diff if os.path.basename(file1) == constants.CIB_F else txt_diff + return diff_func(file1, file2) + + +def get_distro_info() -> str: + """ + Get distribution information + """ + res = None + if os.path.exists(constants.OSRELEASE): + logger.debug2(f"Using {constants.OSRELEASE} to get distribution info") + res = re.search("PRETTY_NAME=\"(.*)\"", read_from_file(constants.OSRELEASE)) + elif shutil.which("lsb_release"): + logger.debug2("Using lsb_release to get distribution info") + out = sh.LocalShell().get_stdout_or_raise_error("lsb_release -d") + res = re.search("Description:\s+(.*)", out) + return res.group(1) if res else "Unknown" + + +def dump_logset(context: core.Context, logf: str) -> None: + """ + Dump the log set into the specified output file + """ + logf_set, logf_type = arch_logs(context, logf) + if not logf_set: + logger.debug2(f"{logf} is not in timespan {get_timespan_str(context)}") + return + + out_string = "" + + if logf_type == LogType.IRREGULAR: + for f in logf_set: + out_string += print_logseg(f, 0, 0) + else: + newest, oldest = logf_set[0], logf_set[-1] + middle_set = logf_set[1:-1] + + if len(logf_set) == 1: + out_string += print_logseg(newest, context.from_time, context.to_time) + else: + out_string += print_logseg(oldest, context.from_time, 0) + for f in middle_set: + out_string += print_logseg(f, 0, 0) + out_string += print_logseg(newest, 0, context.to_time) + + if out_string: + outf = os.path.join(context.work_dir, os.path.basename(logf)) + crmutils.str2file(out_string.strip('\n'), outf) + logger.debug(f"Dump {logf} into {real_path(outf)}") + + +def find_files_in_timespan(context: core.Context, target_dir_list: List[str]) -> List[str]: + """ + Get a list of files in the target directories with creation time in the timespan + """ + file_list = [] + + for target_dir in target_dir_list: + if not os.path.isdir(target_dir): + continue + + for root, dirs, files in os.walk(target_dir): + for file in files: + file_path = os.path.join(root, file) + file_stat = os.stat(file_path) + if context.from_time <= file_stat.st_ctime <= context.to_time: + file_list.append(file_path) + + return file_list + + +def find_first_timestamp(data: List[str]) -> float: + """ + Find the first timestamp in the given list of log line + """ + for line in data: + timestamp = get_timestamp(line) + if timestamp: + return timestamp + return None + + +def filter_lines(data: str, from_line: int, to_line: int) -> str: + """ + Filter lines from the given data based on the specified line range. + """ + lines = data.split('\n') + filtered_lines = [ + line + '\n' + for count, line in enumerate(lines, start=1) + if from_line <= count <= to_line + ] + return ''.join(filtered_lines) + + +def determin_log_format(data: str) -> str: + """ + Determines the log format based on the given log line + """ + for line in head(constants.CHECK_LOG_LINES, data): + _list = line.split() + if not _list: + continue + # syslog format: + # Feb 12 18:30:08 15sp1-1 kernel: e820: BIOS-provided physical RAM map: + if len(_list) >= 3 and crmutils.parse_time(' '.join(_list[0:3]), quiet=True): + return "syslog" + # rfc5424 format: + # 2003-10-11T22:14:15.003Z mymachine.example.com su + if crmutils.parse_time(_list[0], quiet=True): + return "rfc5424" + if len(_list) > 1 and crmutils.parse_time(_list[1], quiet=True): + return "legacy" + return None + + +def findln_by_timestamp(data: str, given_timestamp: float) -> int: + """ + Get line number of the specific time stamp + """ + data_list = data.split('\n') + first, last = 1, len(data_list) + + while first <= last: + middle = (last + first) // 2 + trycnt = 10 + while trycnt > 0: + middle_timestamp = get_timestamp(data_list[middle - 1]) + if middle_timestamp: + break + # shift the whole first-last segment + trycnt -= 1 + prevmid = middle + while prevmid == middle: + first -= 1 + if first < 1: + first = 1 + last -= 1 + if last < first: + last = first + prevmid = middle + middle = (last + first) // 2 + if first == last: + break + + if not middle_timestamp: + return None + if middle_timestamp > given_timestamp: + last = middle - 1 + elif middle_timestamp < given_timestamp: + first = middle + 1 + else: + break + + return middle + + +def get_pkg_mgr() -> str: + """ + Get the package manager available in the system + """ + pkg_mgr_candidates = { + "rpm": "rpm", + "dpkg": "deb" + } + for pkg_mgr, pkg_mgr_name in pkg_mgr_candidates.items(): + if shutil.which(pkg_mgr): + return pkg_mgr_name + + logger.warning("Unknown package manager!") + return "" + + +def get_timestamp(line: str) -> float: + """ + Get timestamp for the given line + """ + if not line or not constants.STAMP_TYPE: + return None + + stamp_type = constants.STAMP_TYPE + if stamp_type == "rfc5424": + time_line = line.split()[0] + elif stamp_type == "syslog": + time_line = ' '.join(line.split()[0:3]) + elif stamp_type == "legacy": + time_line = line.split()[1] + + return crmutils.parse_to_timestamp(time_line, quiet=True) + + +def head(n: int, indata: str) -> List[str]: + return indata.split('\n')[:n] + + +def is_our_log(context: core.Context, logf: str) -> int: + """ + Check if the log contains a piece of our segment + + Return log type LogType + """ + data = read_from_file(logf) + if not data: + return LogType.EMPTY + stamp_type = determin_log_format(data) + if not stamp_type: + return LogType.IRREGULAR + constants.STAMP_TYPE = stamp_type + + first_time = find_first_timestamp(head(constants.CHECK_LOG_LINES, data)) + last_time = find_first_timestamp(tail(constants.CHECK_LOG_LINES, data)) + from_time = context.from_time + to_time = context.to_time + + if from_time > last_time: + return LogType.BEFORE_TIMESPAN + if from_time >= first_time or to_time >= first_time: + return LogType.GOOD + return LogType.AFTER_TIMESPAN + + +def create_description_template(context: core.Context) -> None: + """ + Create description template, editing, and other notes + """ + out_string = constants.DECRIPTION_TMPLATE.format(now(), ' '.join(sys.argv[1:])) + + for n in context.node_list: + sysinfo_node_f = os.path.join(context.work_dir, n, constants.SYSINFO_F) + if os.path.isfile(sysinfo_node_f): + out_string += f"[Info from node {n}]:\n" + out_string += read_from_file(sysinfo_node_f) + out_string += "\n\n\n\n" + + description_f = os.path.join(context.work_dir, constants.DESCRIPTION_F) + crmutils.str2file(out_string, description_f) + + +def print_logseg(log_file: str, from_time: float, to_time: float) -> str: + """ + Print the log segment specified by the given timestamps + """ + data = read_from_file(log_file) + if not data: + return "" + + from_line = 1 if from_time == 0 else findln_by_timestamp(data, from_time) + to_line = len(data.split('\n')) if to_time == 0 else findln_by_timestamp(data, to_time) + + if from_line is None or to_line is None: + return "" + + logger.debug2("Including segment [%d-%d] from %s", from_line, to_line, log_file) + return filter_lines(data, from_line, to_line) + + +def tail(n: int, indata: str) -> List[str]: + return indata.split('\n')[-n:] + + +def txt_diff(file1: str, file2: str) -> Tuple[int, str]: + cmd = f"diff -bBu {file1} {file2}" + rc, out, _ = ShellUtils().get_stdout_stderr(cmd) + return rc, out + + +class Sanitizer: + """ + A class containing methods for sanitizing sensitive data in CIB and PE files + """ + DEFAULT_RULE_LIST = ["passw.*"] + + def __init__(self, context: core.Context) -> None: + self.file_list_in_workdir = [] + self.context = context + self.cib_data = None + self.sensitive_regex_set = set() + self.sensitive_value_list_with_raw_option = [] + self.sensitive_value_list = [] + self.sensitive_key_list = [] + + def prepare(self) -> None: + """ + Prepare the data and files for the sanitization process + """ + self._load_cib_from_work_dir() + self._parse_sensitive_set() + self._extract_sensitive_value_list() + + if self._include_sensitive_data(): + if not self.context.sanitize: + logger.warning("Some PE/CIB/log files contain possibly sensitive data") + logger.warning("Using \"-s\" option can replace sensitive data") + return + self._get_file_list_in_work_dir() + else: + self.context.sanitize = False + + def _include_sensitive_data(self) -> List[str]: + """ + Check whether contain sensitive data + """ + return self.sensitive_value_list_with_raw_option or self.sensitive_value_list + + def _get_file_list_in_work_dir(self) -> List[str]: + """ + Get all files in work directory + """ + for dirpath, dirnames, filenames in os.walk(self.context.work_dir): + for _file in filenames: + self.file_list_in_workdir.append(os.path.join(dirpath, _file)) + + def _load_cib_from_work_dir(self) -> None: + """ + Load CIB data from the working directory + """ + cib_file_list = glob.glob(f"{self.context.work_dir}/*/{constants.CIB_F}") + if not cib_file_list: + raise ReportGenericError(f"CIB file {constants.CIB_F} was not collected") + data = read_from_file(cib_file_list[0]) + if not data: + raise ReportGenericError(f"File {cib_file_list[0]} is empty") + self.cib_data = data + + def _parse_sensitive_set(self) -> None: + """ + Parse sensitive regex from -E option and config.report.sanitize_rule + """ + # from command line option -p + patt_set = set(self.context.sensitive_regex_list) + # from /etc/crm/crm.conf + if config.report.sanitize_rule: + patt_set |= set(re.split('\s*\|\s*|\s+', config.report.sanitize_rule.strip('|'))) + if patt_set: + self.context.sanitize = True + # Not set from -p option and crm.conf, use default + else: + patt_set = set(Sanitizer.DEFAULT_RULE_LIST) + logger.debug2(f"Regex set to match sensitive data: {patt_set}") + self.sensitive_regex_set = patt_set + + def _extract_sensitive_value_list(self) -> None: + """ + Extract sensitive value list from cib data + """ + for patt in self.sensitive_regex_set: + if ':' in patt: + rule, option = patt.split(':') + if option == 'raw': + self.sensitive_value_list_with_raw_option += self._extract_from_cib(rule) + else: + logger.warning(f"For sanitize pattern {patt}, option should be \"raw\"") + else: + self.sensitive_value_list += self._extract_from_cib(patt) + self.sensitive_key_list.append(patt.strip('.*?')+'.*?') + + def _extract_from_cib(self, rule:str) -> List[str]: + name_patt = rule.strip('?')+'?' + value_list = re.findall(f'name="({name_patt})" value="(.*?)"', self.cib_data) + return [value[1] for value in value_list] + + def _sub_sensitive_string(self, data: str) -> str: + """ + Do the replacement job + + For the raw sanitize pattern, replace exactly the value + For the key:value nvpair sanitize pattern, replace the value in which line contain the key + """ + result = data + replace_raw_n: int = 0 + replace_n: int = 0 + + if self.sensitive_value_list_with_raw_option: + patt_str = '|'.join(self.sensitive_value_list_with_raw_option) + result, replace_raw_n = re.subn(r'\b({})\b'.format(patt_str), "******", data) + if self.sensitive_value_list: + key_str = '|'.join(self.sensitive_key_list) + patt_str = '|'.join(self.sensitive_value_list) + result, replace_n = re.subn(f'({key_str})({patt_str})', '\\1******', result) + + return "" if (replace_raw_n == 0 and replace_n == 0) else result + + + def sanitize(self) -> None: + """ + Replace and overwrite files containing sensitive data + """ + if not self.context.sanitize: + return + for f in self.file_list_in_workdir: + data = read_from_file(f) + if not data: + continue + replaced_str = self._sub_sensitive_string(data) + if replaced_str: + logger.debug("Replace sensitive info for %s", f) + write_to_file(replaced_str, f) + + +def do_sanitize(context: core.Context) -> None: + """ + Perform sanitization by replacing sensitive information in CIB/PE/other logs data with '*' + """ + inst = Sanitizer(context) + inst.prepare() + inst.sanitize() + + +class Package: + """ + A class to retrieve package versions and verify packages + on various distros + """ + def __init__(self, packages: str) -> None: + self.pkg_type = get_pkg_mgr() + self.packages = packages + + def pkg_ver_deb(self) -> str: + cmd = f"dpkg-query -W -f='${{Package}} ${{Version}}.${{Architecture}}\n' {self.packages}" + _, out, _ = ShellUtils().get_stdout_stderr(cmd) + return '\n'.join([line for line in out.splitlines() if "no packages found" not in line]) + + def pkg_ver_rpm(self) -> str: + _, out, _ = ShellUtils().get_stdout_stderr(f"rpm -q {self.packages}") + return '\n'.join([line for line in out.splitlines() if "not installed" not in line]) + + def version(self) -> str: + if not self.pkg_type: + return "" + return getattr(self, f"pkg_ver_{self.pkg_type}")() + + def verify_deb(self) -> str: + cmd = f"dpkg --verify {self.packages}" + _, out, _ = ShellUtils().get_stdout_stderr(cmd) + return '\n'.join([line for line in out.splitlines() if "not installed" not in line]) + + def verify_rpm(self) -> str: + cmd = f"rpm --verify {self.packages}" + _, out, _ = ShellUtils().get_stdout_stderr(cmd) + return '\n'.join([line for line in out.splitlines() if "not installed" not in line]) + + def verify(self) -> str: + if not self.pkg_type: + return "" + return getattr(self, f"verify_{self.pkg_type}")() + + +def get_open_method(infile): + """ + Get the appropriate file open method based on the file extension + """ + file_type_open_dict = { + "gz": gzip.open, + "bz2": bz2.open, + "xz": lzma.open + } + file_ext = infile.split('.')[-1] + return file_type_open_dict.get(file_ext, open) + + +def read_from_file(infile: str) -> str: + """ + Read content from a file + """ + _open = get_open_method(infile) + try: + with _open(infile, 'rt', encoding='utf-8', errors='replace') as f: + data = f.read() + except Exception as err: + logger.error("When reading file \"%s\": %s", infile, str(err)) + return "" + + return crmutils.to_ascii(data) + + +def write_to_file(data: str, tofile: str) -> None: + _open = get_open_method(tofile) + with _open(tofile, 'w') as f: + if _open == open: + f.write(data) + else: + f.write(data.encode('utf-8')) + + +def parse_to_timestamp(time: str) -> Optional[float]: + """ + Parses the input time string and converts it to a timestamp + """ + time_format_mapping = { + 'Y': 365, # 1 year is approximately 365 days + 'm': 30, # 1 month is approximately 30 days + 'd': 1, + 'H': 1 / 24, # 1 hour is 1/24 of a day + 'M': 1 / 1440 # 1 minute is 1/1440 of a day + } + + # Match the input time string to the format + match = re.match(r'^-?([1-9][0-9]*)([YmdHM])$', time) + + if not match: + res = crmutils.parse_to_timestamp(time, quiet=True) + if res: + return res + logger.error(f"Invalid time string '{time}'") + logger.error('Try these formats like: 2pm; "2019/9/5 12:30"; "09-Sep-07 2:00"; "[1-9][0-9]*[YmdHM]"') + raise ReportGenericError + + number_str, flag = match.groups() + number = int(number_str) * time_format_mapping[flag] + timedelta = datetime.timedelta(days=number) + + # Calculate the timestamp + timestamp = (datetime.datetime.now() - timedelta).timestamp() + + return timestamp + + +def ts_to_str(timestamp: float) -> str: + """ + Convert timestamp to date string + """ + return dt_to_str(ts_to_dt(timestamp)) + + +def ts_to_dt(timestamp: float) -> datetime.datetime: + """ + Convert timestamp to datetime.datetime object, consider utc offset + """ + dt = crmutils.timestamp_to_datetime(timestamp) + dt += tz.tzlocal().utcoffset(dt) + return dt + + +def dt_to_str(dt: datetime.datetime, form: str = constants.TIME_FORMAT) -> str: + return dt.strftime(form) + + +def now(form: str = constants.TIME_FORMAT) -> str: + return dt_to_str(datetime.datetime.now(), form=form) + + +def get_cmd_output(cmd: str, timeout: int = None) -> str: + """ + Get the output of a command, include stdout and stderr + """ + out_str = "" + _, out, err = ShellUtils().get_stdout_stderr(cmd, timeout=timeout) + if out: + out_str += f"{out}\n" + if err: + out_str += f"{err}\n" + return out_str + + +def get_timespan_str(context: core.Context) -> str: + from_time_str = ts_to_str(context.from_time) + to_time_str = ts_to_str(context.to_time) + return f"{from_time_str} - {to_time_str}" + + +def print_traceback(): + traceback.print_exc() + sys.stdout.flush() + + +def real_path(target_file: str) -> str: + return '/'.join(target_file.split('/')[3:]) +# vim:ts=4:sw=4:et: diff --git a/crmsh/sh.py b/crmsh/sh.py index dba3bad91c..1571d39e2e 100644 --- a/crmsh/sh.py +++ b/crmsh/sh.py @@ -434,7 +434,7 @@ def get_stdout(cls, cmd, input_s=None, stderr_on=True, shell=True, raw=False): return proc.returncode, stdout_data.strip() @classmethod - def get_stdout_stderr(cls, cmd, input_s=None, shell=True, raw=False, no_reg=False): + def get_stdout_stderr(cls, cmd, input_s=None, shell=True, raw=False, no_reg=False, timeout=None): ''' Run a cmd, return (rc, stdout, stderr) ''' @@ -446,7 +446,8 @@ def get_stdout_stderr(cls, cmd, input_s=None, shell=True, raw=False, no_reg=Fals stdin=input_s and subprocess.PIPE or None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout_data, stderr_data = proc.communicate(input_s) + # will raise subprocess.TimeoutExpired if set timeout + stdout_data, stderr_data = proc.communicate(input_s, timeout=timeout) if raw: return proc.returncode, stdout_data, stderr_data else: diff --git a/crmsh/utils.py b/crmsh/utils.py index 1ff671289b..91c99d3855 100644 --- a/crmsh/utils.py +++ b/crmsh/utils.py @@ -3101,4 +3101,7 @@ def parse_user_at_host(s: str): else: return s[:i], s[i+1:] + +def file_is_empty(file: str) -> bool: + return os.stat(file).st_size == 0 # vim:ts=4:sw=4:et: diff --git a/scripts/health/collect.py b/scripts/health/collect.py index 9b777be693..a64fc67986 100755 --- a/scripts/health/collect.py +++ b/scripts/health/collect.py @@ -6,7 +6,7 @@ import hashlib import platform import crm_script -from crmsh.report import utillib +from crmsh.report import utils data = crm_script.get_input() PACKAGES = ['booth', 'cluster-glue', 'corosync', 'crmsh', 'csync2', 'drbd', @@ -34,7 +34,7 @@ def sys_info(): # the number of currently running processes and the total number of # processes. The last column displays the last process ID used. system, node, release, version, machine, processor = platform.uname() - distname = utillib.get_distro_info() + distname = utils.get_distro_info() hostname = os.uname()[1] uptime = open('/proc/uptime').read().split() From 5aeb7686cb976b4935a3c7b5796eaf788a30f63e Mon Sep 17 00:00:00 2001 From: xin liang Date: Sun, 8 Oct 2023 14:38:41 +0800 Subject: [PATCH 05/14] Dev: doc: Unify contents of manpage and help info --- crmsh/help.py | 3 +- crmsh/report/constants.py | 6 + crmsh/report/core.py | 21 +- doc/crm.8.adoc | 31 +-- doc/crmsh_crm_report.8.adoc | 468 +----------------------------------- 5 files changed, 30 insertions(+), 499 deletions(-) diff --git a/crmsh/help.py b/crmsh/help.py index 7c2e76c59f..5ac08fd7d3 100644 --- a/crmsh/help.py +++ b/crmsh/help.py @@ -96,7 +96,8 @@ def paginate(self): short_help = clidisplay.help_header(self.short) if self.from_cli and self.level and self.name: - _, output, _ = ShellUtils().get_stdout_stderr(f"crm {self.level} {self.name} --help-without-redirect") + level = '' if self.level == 'root' else self.level + _, output, _ = ShellUtils().get_stdout_stderr(f"crm {level} {self.name} --help-without-redirect") page_string(short_help + '\n\n'+ output) return diff --git a/crmsh/report/constants.py b/crmsh/report/constants.py index 8d7f987c0f..e80b5c70d0 100644 --- a/crmsh/report/constants.py +++ b/crmsh/report/constants.py @@ -64,6 +64,12 @@ NAME = "crm report" COROSYNC_LIB = "/var/lib/corosync" +DESCRIPTION_HELP = '''Description: +crm report is a utility to collect all information (logs, +configuration files, system information, etc) relevant to +Pacemaker (CRM) over the given period of time. +''' + EXTRA_HELP = ''' Examples # collect from 2pm, today diff --git a/crmsh/report/core.py b/crmsh/report/core.py index f2ef23cc4b..3582ff47ab 100644 --- a/crmsh/report/core.py +++ b/crmsh/report/core.py @@ -78,18 +78,28 @@ def __setitem__(self, key: str, value) -> None: from crmsh.report import constants, utils, collect +class CapitalizedHelpFormatter(argparse.HelpFormatter): + def add_usage(self, usage, actions, groups, prefix=None): + if prefix is None: + prefix = 'Usage: ' + return super().add_usage(usage.capitalize(), actions, groups, prefix) + + def start_section(self, heading): + return super().start_section(heading.capitalize()) + + def add_arguments() -> argparse.Namespace: parser = argparse.ArgumentParser( - usage=f"{constants.NAME} [options] [dest]", + usage=f"\n{constants.NAME} [options] [dest]", add_help=False, - formatter_class=lambda prog: argparse.HelpFormatter(prog, width=80) + formatter_class=lambda prog: CapitalizedHelpFormatter(prog, width=80) ) parser.add_argument("-h", "--help", action="store_true", dest="help", help="Show this help message and exit") parser.add_argument('-f', dest='from_time', metavar='FROM_TIME', - help='Time to start from (default: 12 hours before)') + help='Time to start from (default: 12 hours ago), can be specific time or delta time until now') parser.add_argument('-t', dest='to_time', metavar='TO_TIME', - help='Time to finish at (default: now)') + help='Time to finish at (default: now); can be specific time or delta time until now') parser.add_argument('-d', dest='no_compress', action='store_true', help="Don't compress, but leave result in a directory") parser.add_argument('-n', dest='node_list', metavar='NODE', action=ui_cluster.CustomAppendAction, default=[], @@ -115,10 +125,11 @@ def add_arguments() -> argparse.Namespace: parser.add_argument('-v', dest='debug', action='count', default=0, help='Increase verbosity') parser.add_argument('dest', nargs='?', - help='Report name (may include path where to store the report)') + help="Report name (which may include the path for storing the report), default format is 'crm_report-current_date,' such as 'crm_report-Mon-09-Oct-2023'") args = parser.parse_args() if args.help: + print(constants.DESCRIPTION_HELP) parser.print_help() print(constants.EXTRA_HELP) sys.exit(0) diff --git a/doc/crm.8.adoc b/doc/crm.8.adoc index 1219dd507a..da9fb388fb 100644 --- a/doc/crm.8.adoc +++ b/doc/crm.8.adoc @@ -1,5 +1,5 @@ :man source: crm -:man version: 4.0.0 +:man version: 4.6.0 :man manual: crmsh documentation crm(8) @@ -5035,34 +5035,9 @@ wdiff 2066 2067 wdiff pe-input-2080.bz2 live status ............... -[[cmdhelp_root_report,Create cluster status report]] +[[cmdhelp_root_report,Create cluster status report,From Code]] === `report` - -Interface to a tool for creating a cluster report. A report is an -archive containing log files, configuration files, system information -and other relevant data for a given time period. This is a useful tool -for collecting data to attach to bug reports, or for detecting the -root cause of errors resulting in resource failover, for example. - -See `crmsh_crm_report(8)` for more details on arguments, -or call `crm report -h` - -Usage: -............... -report -f {time|"cts:"testnum} [-t time] [-u user] [-l file] - [-n nodes] [-E files] [-p patt] [-L patt] [-e prog] - [-MSDZAVsvhd] [dest] -............... - -Examples: -............... -report -f 2pm report_1 -report -f "2007/9/5 12:30" -t "2007/9/5 14:00" report_2 -report -f 1:00 -t 3:00 -l /var/log/cluster/ha-debug report_3 -report -f "09sep07 2:00" -u hbadmin report_4 -report -f 18:00 -p "usern.*" -p "admin.*" report_5 -report -f cts:133 ctstest_133 -............... +See "crm help report" or "crm report --help" === `end` (`cd`, `up`) diff --git a/doc/crmsh_crm_report.8.adoc b/doc/crmsh_crm_report.8.adoc index 98ef0a77d0..9e5e91c02e 100644 --- a/doc/crmsh_crm_report.8.adoc +++ b/doc/crmsh_crm_report.8.adoc @@ -1,6 +1,6 @@ :man source: crmsh_crm_report -:man version: 1.2 -:man manual: Pacemaker documentation +:man version: 4.6.0 +:man manual: crmsh documentation crmsh_crm_report(8) ================== @@ -10,468 +10,6 @@ NAME crmsh_crm_report - create report for CRM based clusters (Pacemaker) -SYNOPSIS --------- -*crm report* -f {time|"cts:"testnum} [-t time] [-u user] [-l file] - [-n nodes] [-E files] [-p patt] [-L patt] [-e prog] - [-MSDCZAQVsvhd] [dest] - - -DESCRIPTION ------------ -The crmsh_crm_report(8) is a utility to collect all information (logs, -configuration files, system information, etc) relevant to -Pacemaker (CRM) over the given period of time. - - -OPTIONS -------- -dest:: - The report name. It can also contain a path where to put the - report tarball. If left out, the tarball is created in the - current directory named "crm_report-current_date", for instance - crm_report-Wed-03-Mar-2010. - -*-d*:: - Don't create the compressed tar, but leave the result in a - directory. - -*-f* { time | "cts:"testnum }:: - The start time from which to collect logs. The time is in the - format as used by the Date::Parse perl module. For cts tests, - specify the "cts:" string followed by the test number. This - option is required. - -*-t* time:: - The end time to which to collect logs. Defaults to now. - -*-n* nodes:: - A list of space separated hostnames (cluster members). - crm report may try to find out the set of nodes by itself, but - if it runs on the loghost which, as it is usually the case, - does not belong to the cluster, that may be difficult. Also, - OpenAIS doesn't contain a list of nodes and if Pacemaker is - not running, there is no way to find it out automatically. - This option is cumulative (i.e. use -n "a b" or -n a -n b). - -*-l* file:: - Log file location. If, for whatever reason, crm report cannot - find the log files, you can specify its absolute path. - -*-E* files:: - Extra log files to collect. This option is cumulative. By - default, /var/log/messages are collected along with the - cluster logs. - -*-M*:: - Don't collect extra log files, but only the file containing - messages from the cluster subsystems. - -*-L* patt:: - A list of regular expressions to match in log files for - analysis. This option is additive (default: "CRIT: ERROR:"). - -*-p* patt:: - Additional patterns to match parameter name which contain - sensitive information. This option is additive (default: "passw.*"). - -*-Q*:: - Quick run. Gathering some system information can be expensive. - With this option, such operations are skipped and thus - information collecting sped up. The operations considered - I/O or CPU intensive: verifying installed packages content, - sanitizing files for sensitive information, and producing dot - files from PE inputs. - -*-A*:: - This is an OpenAIS cluster. `crm report` has some heuristics to - find the cluster stack, but that is not always reliable. - By default, `crm report` assumes that it is run on a Heartbeat - cluster. - -*-u* user:: - The ssh user. `crm report` will try to login to other nodes - without specifying a user, then as "root", and finally as - "hacluster". If you have another user for administration over - ssh, please use this option. - -*-X* ssh-options:: - Extra ssh options. These will be added to every ssh - invocation. Alternatively, use `$HOME/.ssh/config` to setup - desired ssh connection options. - -*-S*:: - Single node operation. Run `crm report` only on this node and - don't try to start slave collectors on other members of the - cluster. Under normal circumstances this option is not - needed. Use if ssh(1) does not work to other nodes. - -*-Z*:: - If the destination directory exist, remove it instead of - exiting (this is default for CTS). - -*-V*:: - Print the version including the last repository changeset. - -*-v*:: - Increase verbosity. Normally used to debug unexpected - behaviour. - -*-h*:: - Show usage and some examples. - -*-D* (obsolete):: - Don't invoke editor to fill the description text file. - -*-e* prog (obsolete):: - Your favourite text editor. Defaults to $EDITOR, vim, vi, - emacs, or nano, whichever is found first. - -*-C* (obsolete):: - Remove the destination directory once the report has been put - in a tarball. - -EXAMPLES --------- -Last night during the backup there were several warnings -encountered (logserver is the log host): - - logserver# crm report -f 3:00 -t 4:00 -n "node1 node2" report - -collects everything from all nodes from 3am to 4am last night. -The files are compressed to a tarball report.tar.bz2. - -Just found a problem during testing: - - # note the current time - node1# date - Fri Sep 11 18:51:40 CEST 2009 - node1# /etc/init.d/heartbeat start - node1# nasty-command-that-breaks-things - node1# sleep 120 #wait for the cluster to settle - node1# crm report -f 18:51 hb1 - - # if crm report can't figure out that this is corosync - node1# crm report -f 18:51 -A hb1 - - # if crm report can't figure out the cluster members - node1# crm report -f 18:51 -n "node1 node2" hb1 - -The files are compressed to a tarball hb1.tar.bz2. - -INTERPRETING RESULTS --------------------- -The compressed tar archive is the final product of `crm report`. -This is one example of its content, for a CTS test case on a -three node OpenAIS cluster: - - $ ls -RF 001-Restart - - 001-Restart: - analysis.txt events.txt logd.cf s390vm13/ s390vm16/ - description.txt ha-log.txt openais.conf s390vm14/ - - 001-Restart/s390vm13: - STOPPED crm_verify.txt hb_uuid.txt openais.conf@ sysinfo.txt - cib.txt dlm_dump.txt logd.cf@ pengine/ sysstats.txt - cib.xml events.txt messages permissions.txt - - 001-Restart/s390vm13/pengine: - pe-input-738.bz2 pe-input-740.bz2 pe-warn-450.bz2 - pe-input-739.bz2 pe-warn-449.bz2 pe-warn-451.bz2 - - 001-Restart/s390vm14: - STOPPED crm_verify.txt hb_uuid.txt openais.conf@ sysstats.txt - cib.txt dlm_dump.txt logd.cf@ permissions.txt - cib.xml events.txt messages sysinfo.txt - - 001-Restart/s390vm16: - STOPPED crm_verify.txt hb_uuid.txt messages sysinfo.txt - cib.txt dlm_dump.txt hostcache openais.conf@ sysstats.txt - cib.xml events.txt logd.cf@ permissions.txt - -The top directory contains information which pertains to the -cluster or event as a whole. Files with exactly the same content -on all nodes will also be at the top, with per-node links created -(as it is in this example the case with openais.conf and logd.cf). - -The cluster log files are named ha-log.txt regardless of the -actual log file name on the system. If it is found on the -loghost, then it is placed in the top directory. If not, the top -directory ha-log.txt contains all nodes logs merged and sorted by -time. Files named messages are excerpts of /var/log/messages from -nodes. - -Most files are copied verbatim or they contain output of a -command. For instance, cib.xml is a copy of the CIB found in -/var/lib/heartbeat/crm/cib.xml. crm_verify.txt is output of the -crm_verify(8) program. - -Some files are result of a more involved processing: - - *analysis.txt*:: - A set of log messages matching user defined patterns (may be - provided with the -L option). - - *events.txt*:: - A set of log messages matching event patterns. It should - provide information about major cluster motions without - unnecessary details. These patterns are devised by the - cluster experts. Currently, the patterns cover membership - and quorum changes, resource starts and stops, fencing - (stonith) actions, and cluster starts and stops. events.txt - is always generated for each node. In case the central - cluster log was found, also combined for all nodes. - - *permissions.txt*:: - One of the more common problem causes are file and directory - permissions. `crm report` looks for a set of predefined - directories and checks their permissions. Any issues are - reported here. - - *backtraces.txt*:: - gdb generated backtrace information for cores dumped - within the specified period. - - *sysinfo.txt*:: - Various release information about the platform, kernel, - operating system, packages, and anything else deemed to be - relevant. The static part of the system. - - *sysstats.txt*:: - Output of various system commands such as ps(1), uptime(1), - netstat(8), and ip(8). The dynamic part of the system. - -description.txt should contain a user supplied description of the -problem, but since it is very seldom used, it will be dropped -from the future releases. - -PREREQUISITES -------------- - -ssh:: - It is not strictly required, but you won't regret having a - password-less ssh. It is not too difficult to setup and will save - you a lot of time. If you can't have it, for example because your - security policy does not allow such a thing, or you just prefer - menial work, then you will have to resort to the semi-manual - semi-automated report generation. See below for instructions. - + - If you need to supply a password for your passphrase/login, then - always use the `-u` option. - + - For extra ssh(1) options, if you're too lazy to setup - $HOME/.ssh/config, use the `-X` option. Do not forget to put - the options in quotes. - -sudo:: - If the ssh user (as specified with the `-u` option) is other - than `root`, then `crm report` uses `sudo` to collect the - information which is readable only by the `root` user. In that - case it is required to setup the `sudoers` file properly. The - user (or group to which the user belongs) should have the - following line: - + - ALL = NOPASSWD: /usr/sbin/crm - + - See the `sudoers(5)` man page for more details. - -Times:: - In order to find files and messages in the given period and to - parse the `-f` and `-t` options, `crm report` uses perl and one of the - `Date::Parse` or `Date::Manip` perl modules. Note that you need - only one of these. Furthermore, on nodes which have no logs and - where you don't run `crm report` directly, no date parsing is - necessary. In other words, if you run this on a loghost then you - don't need these perl modules on the cluster nodes. - + - On rpm based distributions, you can find `Date::Parse` in - `perl-TimeDate` and on Debian and its derivatives in - `libtimedate-perl`. - -Core dumps:: - To backtrace core dumps gdb is needed and the packages with - the debugging info. The debug info packages may be installed - at the time the report is created. Let's hope that you will - need this really seldom. - -TIMES ------ - -Specifying times can at times be a nuisance. That is why we have -chosen to use one of the perl modules--they do allow certain -freedom when talking dates. You can either read the instructions -at the -http://search.cpan.org/dist/TimeDate/lib/Date/Parse.pm#EXAMPLE_DATES[Date::Parse -examples page]. -or just rely on common sense and try stuff like: - - 3:00 (today at 3am) - 15:00 (today at 3pm) - 2007/9/1 2pm (September 1st at 2pm) - Tue Sep 15 20:46:27 CEST 2009 (September 15th etc) - -`crm report` will (probably) complain if it can't figure out what do -you mean. - -Try to delimit the event as close as possible in order to reduce -the size of the report, but still leaving a minute or two around -for good measure. - -`-f` is not optional. And don't forget to quote dates when they -contain spaces. - - -Should I send all this to the rest of Internet? ------------------------------------------------ - -By default, the sensitive data in CIB and PE files is not mangled -by `crm report` because that makes PE input files mostly useless. -If you still have no other option but to send the report to a -public mailing list and do not want the sensitive data to be -included, use the `-s` option. Without this option, `crm report` -will issue a warning if it finds information which should not be -exposed. By default, parameters matching 'passw.*' are considered -sensitive. Use the `-p` option to specify additional regular -expressions to match variable names which may contain information -you don't want to leak. For example: - - # crm report -f 18:00 -p "user.*" -p "secret.*" /var/tmp/report - -Heartbeat's ha.cf is always sanitized. Logs and other files are -not filtered. - -LOGS ----- - -It may be tricky to find syslog logs. The scheme used is to log a -unique message on all nodes and then look it up in the usual -syslog locations. This procedure is not foolproof, in particular -if the syslog files are in a non-standard directory. We look in -/var/log /var/logs /var/syslog /var/adm /var/log/ha -/var/log/cluster. In case we can't find the logs, please supply -their location: - - # crm report -f 5pm -l /var/log/cluster1/ha-log -S /tmp/report_node1 - -If you have different log locations on different nodes, well, -perhaps you'd like to make them the same and make life easier for -everybody. - -Files starting with "ha-" are preferred. In case syslog sends -messages to more than one file, if one of them is named ha-log or -ha-debug those will be favoured over syslog or messages. - -`crm report` supports also archived logs in case the period -specified extends that far in the past. The archives must reside -in the same directory as the current log and their names must -be prefixed with the name of the current log (syslog-1.gz or -messages-20090105.bz2). - -If there is no separate log for the cluster, possibly unrelated -messages from other programs are included. We don't filter logs, -but just pick a segment for the period you specified. - -MANUAL REPORT COLLECTION ------------------------- - -So, your ssh doesn't work. In that case, you will have to run -this procedure on all nodes. Use `-S` so that `crm report` doesn't -bother with ssh: - - # crm report -f 5:20pm -t 5:30pm -S /tmp/report_node1 - -If you also have a log host which is not in the cluster, then -you'll have to copy the log to one of the nodes and tell us where -it is: - - # crm report -f 5:20pm -t 5:30pm -l /var/tmp/ha-log -S /tmp/report_node1 - -OPERATION ---------- -`crm report` collects files and other information in a fairly -straightforward way. The most complex tasks are discovering the -log file locations (if syslog is used which is the most common -case) and coordinating the operation on multiple nodes. - -The instance of `crm report` running on the host where it was -invoked is the master instance. Instances running on other nodes -are slave instances. The master instance communicates with slave -instances by ssh. There are multiple ssh invocations per run, so -it is essential that the ssh works without password, i.e. with -the public key authentication and authorized_keys. - -The operation consists of three phases. Each phase must finish -on all nodes before the next one can commence. The first phase -consists of logging unique messages through syslog on all nodes. -This is the shortest of all phases. - -The second phase is the most involved. During this phase all -local information is collected, which includes: - -- logs (both current and archived if the start time is far in the past) -- various configuration files (corosync, heartbeat, logd) -- the CIB (both as xml and as represented by the crm shell) -- pengine inputs (if this node was the DC at any point in - time over the given period) -- system information and status -- package information and status -- dlm lock information -- backtraces (if there were core dumps) - -The third phase is collecting information from all nodes and -analyzing it. The analyzis consists of the following tasks: - -- identify files equal on all nodes which may then be moved to - the top directory -- save log messages matching user defined patterns - (defaults to ERRORs and CRITical conditions) -- report if there were coredumps and by whom -- report crm_verify(8) results -- save log messages matching major events to events.txt -- in case logging is configured without loghost, node logs and - events files are combined using a perl utility - - -BUGS ----- -Finding logs may at times be extremely difficult, depending on -how weird the syslog configuration. It would be nice to ask -syslog-ng developers to provide a way to find out the log -destination based on facility and priority. - -If you think you found a bug, please rerun with the -v option and -attach the output to bugzilla. - -`crm report` can function in a satisfactory way only if ssh works to -all nodes using authorized_keys (without password). - -There are way too many options. - - -AUTHOR ------- -Written by Dejan Muhamedagic, - - -RESOURCES ---------- -ClusterLabs: - -Heartbeat and other Linux HA resources: - -OpenAIS: - -Corosync: - - SEE ALSO -------- -crm(8), Date::Parse(3) - - -COPYING -------- -Copyright \(C) 2007-2009 Dejan Muhamedagic. Free use of this -software is granted under the terms of the GNU General Public License (GPL). - +See "crm help report" or "crm report --help" From 61e5796d3036653c672e53646e352251d66b903f Mon Sep 17 00:00:00 2001 From: xin liang Date: Fri, 13 Oct 2023 17:25:53 +0800 Subject: [PATCH 06/14] Dev: behave: Adjust functional behave test for previous changes --- .github/workflows/crmsh-ci.yml | 15 ++- data-manifest | 1 + test/features/bootstrap_bugs.feature | 16 +-- .../bootstrap_init_join_remove.feature | 2 + test/features/bootstrap_options.feature | 6 + test/features/crm_report_bugs.feature | 60 +++------- test/features/crm_report_normal.feature | 108 ++++++++++++++++++ test/features/qdevice_setup_remove.feature | 1 + test/features/qdevice_usercase.feature | 2 + test/features/steps/step_implementation.py | 32 +++++- 10 files changed, 188 insertions(+), 55 deletions(-) create mode 100644 test/features/crm_report_normal.feature diff --git a/.github/workflows/crmsh-ci.yml b/.github/workflows/crmsh-ci.yml index 44e20a950e..f58ebd9a97 100644 --- a/.github/workflows/crmsh-ci.yml +++ b/.github/workflows/crmsh-ci.yml @@ -54,7 +54,7 @@ jobs: timeout-minutes: 40 steps: - uses: actions/checkout@v3 - - name: functional test for crm_report + - name: functional test for crm_report bugs run: | echo '{ "exec-opts": ["native.cgroupdriver=systemd"] }' | sudo tee /etc/docker/daemon.json sudo systemctl restart docker.service @@ -62,6 +62,19 @@ jobs: $DOCKER_SCRIPT $index && $DOCKER_SCRIPT -d && $DOCKER_SCRIPT $index -u - uses: codecov/codecov-action@v3 + functional_test_crm_report_normal: + runs-on: ubuntu-20.04 + timeout-minutes: 40 + steps: + - uses: actions/checkout@v3 + - name: functional test for crm_report normal + run: | + echo '{ "exec-opts": ["native.cgroupdriver=systemd"] }' | sudo tee /etc/docker/daemon.json + sudo systemctl restart docker.service + index=`$GET_INDEX_OF crm_report_normal` + $DOCKER_SCRIPT $index && $DOCKER_SCRIPT -d && $DOCKER_SCRIPT $index -u + - uses: codecov/codecov-action@v3 + functional_test_bootstrap_bugs: runs-on: ubuntu-20.04 timeout-minutes: 40 diff --git a/data-manifest b/data-manifest index 5ab24f5b3a..b6d65790f5 100644 --- a/data-manifest +++ b/data-manifest @@ -75,6 +75,7 @@ test/features/configure_bugs.feature test/features/constraints_bugs.feature test/features/coveragerc test/features/crm_report_bugs.feature +test/features/crm_report_normal.feature test/features/environment.py test/features/geo_setup.feature test/features/healthcheck.feature diff --git a/test/features/bootstrap_bugs.feature b/test/features/bootstrap_bugs.feature index 6e2dcc6508..bd5e102f79 100644 --- a/test/features/bootstrap_bugs.feature +++ b/test/features/bootstrap_bugs.feature @@ -19,21 +19,21 @@ Feature: Regression test for bootstrap bugs Then Got output "default" @clean - Scenario: Space value not allowed for option(bsc#1141976) + Scenario: Empty value not allowed for option(bsc#1141976) When Try "crm -c ' '" - Then Except "ERROR: Space value not allowed for dest "cib"" + Then Except "ERROR: Empty value not allowed for dest "cib"" When Try "crm cluster init --name ' '" - Then Except "ERROR: cluster.init: Space value not allowed for dest "cluster_name"" + Then Except "ERROR: cluster.init: Empty value not allowed for dest "cluster_name"" When Try "crm cluster join -c ' '" - Then Except "ERROR: cluster.join: Space value not allowed for dest "cluster_node"" + Then Except "ERROR: cluster.join: Empty value not allowed for dest "cluster_node"" When Try "crm cluster remove -c ' '" - Then Except "ERROR: cluster.remove: Space value not allowed for dest "cluster_node"" + Then Except "ERROR: cluster.remove: Empty value not allowed for dest "cluster_node"" When Try "crm cluster geo_init -a ' '" - Then Except "ERROR: cluster.geo_init: Space value not allowed for dest "arbitrator"" + Then Except "ERROR: cluster.geo_init: Empty value not allowed for dest "arbitrator"" When Try "crm cluster geo_join -c ' '" - Then Except "ERROR: cluster.geo_join: Space value not allowed for dest "cluster_node"" + Then Except "ERROR: cluster.geo_join: Empty value not allowed for dest "cluster_node"" When Try "crm cluster geo_init_arbitrator -c ' '" - Then Except "ERROR: cluster.geo_init_arbitrator: Space value not allowed for dest "cluster_node"" + Then Except "ERROR: cluster.geo_init_arbitrator: Empty value not allowed for dest "cluster_node"" @clean Scenario: Setup cluster with crossed network(udpu only) diff --git a/test/features/bootstrap_init_join_remove.feature b/test/features/bootstrap_init_join_remove.feature index 4a1e924dd9..ed04525795 100644 --- a/test/features/bootstrap_init_join_remove.feature +++ b/test/features/bootstrap_init_join_remove.feature @@ -124,6 +124,8 @@ Feature: crmsh bootstrap process - init, join and remove Then Directory "/var/lib/pacemaker/cib/" is empty on "hanode2" Then Directory "/var/lib/pacemaker/pengine/" is empty on "hanode2" Then Directory "/var/lib/corosync/" is empty on "hanode2" + When Run "crm cluster remove hanode1 -y --force" on "hanode1" + Then File "/etc/corosync/corosync.conf" not exist on "hanode1" Scenario: Remove local node "hanode1" with `crm -F node delete` When Run "crm configure primitive d1 Dummy" on "hanode1" diff --git a/test/features/bootstrap_options.feature b/test/features/bootstrap_options.feature index 51fed6298b..5ccc052ee7 100644 --- a/test/features/bootstrap_options.feature +++ b/test/features/bootstrap_options.feature @@ -34,6 +34,12 @@ Feature: crmsh bootstrap process - options usage: init [options] [STAGE] crm: error: Duplicated input for '-i/--interface' option """ + When Try "crm cluster init sbd -x -y" on "hanode1" + Then Expected "-x option or SKIP_CSYNC2_SYNC can't be used with any stage" in stderr + When Try "crm cluster init -i eth0 -i eth1 -i eth2 -y" on "hanode1" + Then Expected "Maximum number of interface is 2" in stderr + When Try "crm cluster init sbd -N hanode1 -N hanode2 -y" on "hanode1" + Then Expected "Can't use -N/--nodes option and stage(sbd) together" in stderr @clean Scenario: Init whole cluster service on node "hanode1" using "--node" option diff --git a/test/features/crm_report_bugs.feature b/test/features/crm_report_bugs.feature index ed91012186..907ba5c7f8 100644 --- a/test/features/crm_report_bugs.feature +++ b/test/features/crm_report_bugs.feature @@ -15,6 +15,22 @@ Feature: crm report functional test for verifying bugs And Online nodes are "hanode1 hanode2" And Show cluster status on "hanode1" + @clean + Scenario: Verify crm report handle files contain non-utf-8 characters (bsc#1130715) + When Run "echo 'abc#$%%^' | iconv -f UTF-8 -t UTF-16 > /opt/text_non_utf8" on "hanode1" + Then This file "/opt/text_non_utf8" will trigger UnicodeDecodeError exception + When Run "crm report -E /opt/text_non_utf8 report1" on "hanode1" + Then File "text_non_utf8" in "report1.tar.bz2" + When Run "rm -f report1.tar.bz2" on "hanode1" + + @clean + Scenario: Compressed file ended before the end-of-stream marker was reached (bsc#1206606) + When Run "touch /var/log/pacemaker/pacemaker.log-20221220.xz" on "hanode1" + When Try "crm report report1" on "hanode1" + Then File "pacemaker.log" in "report1.tar.bz2" + And Expected "When reading file "/var/log/pacemaker/pacemaker.log-20221220.xz": Compressed file ended before the end-of-stream marker was reached" in stderr + When Run "rm -f report1.tar.bz2" on "hanode1" + @clean Scenario: Include archived logs(bsc#1148873) When Write multi lines to file "/var/log/log1" on "hanode1" @@ -114,47 +130,3 @@ Feature: crm report functional test for verifying bugs # found password Then Expected return code is "0" When Run "rm -rf report.tar.bz2 report" on "hanode1" - - @clean - Scenario: crm report collect trace ra log - When Run "crm configure primitive d Dummy" on "hanode1" - And Run "crm configure primitive d2 Dummy" on "hanode1" - Then Resource "d" is started on "hanode1" - And Resource "d2" is started on "hanode2" - When Run "crm resource trace d monitor" on "hanode1" - Then Expected "Trace for d:monitor is written to /var/lib/heartbeat/trace_ra/Dummy" in stdout - When Wait "10" seconds - And Run "crm resource untrace d" on "hanode1" - And Run "crm resource trace d2 monitor /trace_d" on "hanode1" - Then Expected "Trace for d2:monitor is written to /trace_d/Dummy" in stdout - When Wait "10" seconds - And Run "crm resource untrace d2" on "hanode1" - And Run "crm report report" on "hanode1" - Then Directory "trace_ra" in "report.tar.bz2" - And Directory "trace_d" in "report.tar.bz2" - When Run "rm -rf report.tar.bz2 report" on "hanode1" - - @clean - Scenario: Run script - When Run "crm script run health" on "hanode1" - When Run "crm script run virtual-ip id=vip_x ip=@vip.0" on "hanode1" - Then Resource "vip_x" type "IPaddr2" is "Started" - - @clean - Scenario: Run history - When Run "crm history info" on "hanode1" - When Run "crm history refresh" on "hanode1" - When Try "crm history peinputs|grep "pengine/pe-input-0"" - Then Expected return code is "0" - When Try "crm history info|grep "Nodes: hanode1 hanode2"" - Then Expected return code is "0" - When Run "crm configure primitive d100 Dummy" on "hanode1" - When Run "crm history refresh force" on "hanode1" - When Try "crm history info|grep "Resources: d100"" - Then Expected return code is "0" - Given Cluster service is "stopped" on "hanode3" - When Run "crm cluster join -c hanode1 -y" on "hanode3" - Then Cluster service is "started" on "hanode3" - When Run "crm history refresh force" on "hanode1" - When Try "crm history info|grep "Nodes: hanode1 hanode2 hanode3"" - Then Expected return code is "0" diff --git a/test/features/crm_report_normal.feature b/test/features/crm_report_normal.feature new file mode 100644 index 0000000000..a7db7f8abe --- /dev/null +++ b/test/features/crm_report_normal.feature @@ -0,0 +1,108 @@ +@crm_report +Feature: crm report functional test for common cases + + Tag @clean means need to stop cluster service if the service is available + Need nodes: hanode1 hanode2 hanode3 + + Background: Setup a two nodes cluster + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Show cluster status on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + And Show cluster status on "hanode1" + + @clean + Scenario: crm report collect trace ra log + When Run "crm configure primitive d Dummy" on "hanode1" + And Run "crm configure primitive d2 Dummy" on "hanode1" + Then Resource "d" is started on "hanode1" + And Resource "d2" is started on "hanode2" + When Run "crm resource trace d monitor" on "hanode1" + Then Expected "Trace for d:monitor is written to /var/lib/heartbeat/trace_ra/Dummy" in stdout + When Wait "10" seconds + And Run "crm resource untrace d" on "hanode1" + And Run "crm resource trace d2 monitor /trace_d" on "hanode1" + Then Expected "Trace for d2:monitor is written to /trace_d/Dummy" in stdout + When Wait "10" seconds + And Run "crm resource untrace d2" on "hanode1" + And Run "crm report report" on "hanode1" + Then Directory "trace_ra" in "report.tar.bz2" + And Directory "trace_d" in "report.tar.bz2" + When Run "rm -rf report.tar.bz2 report" on "hanode1" + + @clean + Scenario: Run history and script + When Run "crm history info" on "hanode1" + When Run "crm history refresh" on "hanode1" + When Try "crm history peinputs|grep "pengine/pe-input-0"" + Then Expected return code is "0" + When Try "crm history info|grep "Nodes: hanode1 hanode2"" + Then Expected return code is "0" + When Run "crm configure primitive d100 Dummy" on "hanode1" + When Run "crm history refresh force" on "hanode1" + When Try "crm history info|grep "Resources: d100"" + Then Expected return code is "0" + Given Cluster service is "stopped" on "hanode3" + When Run "crm cluster join -c hanode1 -y" on "hanode3" + Then Cluster service is "started" on "hanode3" + When Run "crm history refresh force" on "hanode1" + When Try "crm history info|grep "Nodes: hanode1 hanode2 hanode3"" + Then Expected return code is "0" + When Run "crm script run health" on "hanode1" + When Run "crm script run virtual-ip id=vip_x ip=@vip.0" on "hanode1" + Then Resource "vip_x" type "IPaddr2" is "Started" + + @clean + Scenario: Common tests + When Run "crm report -h" on "hanode1" + + When Try "crm report "*s"" on "hanode1" + Then Expected "*s is invalid file name" in stderr + + When Try "crm report /fsf/report" on "hanode1" + Then Expected "Directory /fsf does not exist" in stderr + + When Try "crm report -n fs" on "hanode1" + Then Expected "host "fs" is unreachable:" in stderr + + When Try "crm report -f xxxx" on "hanode1" + Then Expected "Invalid time string 'xxxx'" in stderr + + When Try "crm report -f 1d -t 2d" on "hanode1" + Then Expected "The start time must be before the finish time" in stderr + + When Run "crm -d report -S -d /tmp/report" on "hanode1" + Then Directory "/tmp/report/hanode1" created + Then Directory "/tmp/report/hanode2" not created + When Run "rm -rf /tmp/report" on "hanode1" + + When Run "crm report -vv" on "hanode1" + Then Default crm_report tar file created + When Remove default crm_report tar file + + When Run "crm report -d /tmp/report" on "hanode1" + Then Directory "/tmp/report" created + When Try "crm report -d /tmp/report" on "hanode1" + Then Expected "Destination directory /tmp/report exists, please cleanup or use -Z option" in stderr + When Run "crm report -d -Z /tmp/report" on "hanode1" + Then Directory "/tmp/report" created + + When Run "mv /etc/corosync/corosync.conf /etc/corosync/corosync.bak" on "hanode1" + When Try "crm report" on "hanode1" + Then Expected "File /etc/corosync/corosync.conf does not exist" in stderr + When Run "mv /etc/corosync/corosync.bak /etc/corosync/corosync.conf" on "hanode1" + + When Run "mv /var/lib/pacemaker/pengine /var/lib/pacemaker/pengine_bak" on "hanode1" + When Try "crm report" on "hanode1" + Then Expected "Cannot find PE directory" in stderr + When Run "mv /var/lib/pacemaker/pengine_bak /var/lib/pacemaker/pengine" on "hanode1" + + When Run "crm cluster stop --all" on "hanode1" + When Run "rm -f /var/lib/pacemaker/cib/cib*" on "hanode1" + When Run "rm -f /var/lib/pacemaker/cib/cib*" on "hanode2" + When Try "crm report" on "hanode1" + Then Expected "Could not figure out a list of nodes; is this a cluster node" in stderr diff --git a/test/features/qdevice_setup_remove.feature b/test/features/qdevice_setup_remove.feature index 77b8abd6bf..df7af3d44e 100644 --- a/test/features/qdevice_setup_remove.feature +++ b/test/features/qdevice_setup_remove.feature @@ -25,6 +25,7 @@ Feature: corosync qdevice/qnetd setup/remove process And Service "corosync-qnetd" is "started" on "qnetd-node" And Show status from qnetd And Show corosync qdevice configuration + And Show qdevice status @clean Scenario: Setup qdevice/qnetd on running cluster diff --git a/test/features/qdevice_usercase.feature b/test/features/qdevice_usercase.feature index 617beb5bc9..c35d2cb6fb 100644 --- a/test/features/qdevice_usercase.feature +++ b/test/features/qdevice_usercase.feature @@ -83,3 +83,5 @@ Feature: Verify usercase master survive when split-brain Then Expected "Quorate: No" in stdout And Show cluster status on "hanode1" And Show cluster status on "hanode2" + When Try "crm corosync status fs" on "hanode1" + Then Expected "Wrong type "fs" to query status" in stderr diff --git a/test/features/steps/step_implementation.py b/test/features/steps/step_implementation.py index 8c278f3b63..9f25625557 100644 --- a/test/features/steps/step_implementation.py +++ b/test/features/steps/step_implementation.py @@ -287,6 +287,13 @@ def step_impl(context): context.logger.info("\n{}".format(out)) +@then('Show qdevice status') +def step_impl(context): + _, out, _ = run_command(context, 'crm corosync status qdevice') + if out: + context.logger.info("\n{}".format(out)) + + @then('Show corosync qdevice configuration') def step_impl(context): _, out, _ = run_command(context, "sed -n -e '/quorum/,/^}/ p' /etc/corosync/corosync.conf") @@ -353,15 +360,25 @@ def step_impl(context, votes): assert_eq(int(votes), int(corosync.get_value("quorum.expected_votes"))) +@then('Directory "{directory}" created') +def step_impl(context, directory): + assert os.path.isdir(directory) is True + + +@then('Directory "{directory}" not created') +def step_impl(context, directory): + assert os.path.isdir(directory) is False + + @then('Default crm_report tar file created') def step_impl(context): - default_file_name = 'crm_report-{}.tar.bz2'.format(datetime.datetime.now().strftime("%w-%d-%m-%Y")) + default_file_name = 'crm_report-{}.tar.bz2'.format(datetime.datetime.now().strftime("%a-%d-%b-%Y")) assert os.path.exists(default_file_name) is True @when('Remove default crm_report tar file') def step_impl(context): - default_file_name = 'crm_report-{}.tar.bz2'.format(datetime.datetime.now().strftime("%w-%d-%m-%Y")) + default_file_name = 'crm_report-{}.tar.bz2'.format(datetime.datetime.now().strftime("%a-%d-%b-%Y")) os.remove(default_file_name) @@ -537,3 +554,14 @@ def step_impl(context, path, nodes): for node in nodes: rc, _, _ = behave_agent.call(node, 1122, f"systemd-run --uid '{user}' -u ssh-agent /usr/bin/ssh-agent -D -a '{path}'", user='root') assert 0 == rc + + +@then('This file "{target_file}" will trigger UnicodeDecodeError exception') +def step_impl(context, target_file): + try: + with open(target_file, "r", encoding="utf-8") as file: + content = file.read() + except UnicodeDecodeError as e: + return True + else: + return False From 65e2895537a25c1981471cfc8960fb7cf37eda02 Mon Sep 17 00:00:00 2001 From: xin liang Date: Fri, 20 Oct 2023 13:46:40 +0800 Subject: [PATCH 07/14] Dev: codecov.yml: Increase code coverage threshold as 0.35% --- codecov.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codecov.yml b/codecov.yml index f11a988584..1c887ed978 100644 --- a/codecov.yml +++ b/codecov.yml @@ -2,10 +2,10 @@ coverage: status: project: default: - threshold: 0.1% + threshold: 0.35% patch: default: - threshold: 0.1% + threshold: 0.35% codecov: token: 16b01c29-3b23-4923-b33a-4d26a49d80c4 notify: From 855073385d9f0b44298c865f23caed1617fc59a7 Mon Sep 17 00:00:00 2001 From: xin liang Date: Fri, 17 Nov 2023 09:59:17 +0800 Subject: [PATCH 08/14] Dev: report: Consolidate utils.read_from_file --- crmsh/report/collect.py | 2 +- crmsh/report/utils.py | 49 ++++++++--------------------------------- crmsh/utils.py | 41 +++++++++++++++++++++++++--------- 3 files changed, 41 insertions(+), 51 deletions(-) diff --git a/crmsh/report/collect.py b/crmsh/report/collect.py index 10cd048b04..8192681052 100644 --- a/crmsh/report/collect.py +++ b/crmsh/report/collect.py @@ -46,7 +46,7 @@ def get_pcmk_log() -> str: ] if os.path.isfile(constants.PCMKCONF): - data = utils.read_from_file(constants.PCMKCONF) + data = crmutils.read_from_file(constants.PCMKCONF) if data: res = re.search(r'^ *PCMK_logfile *= *(.*)', data, re.M) if res: diff --git a/crmsh/report/utils.py b/crmsh/report/utils.py index abf3813bfa..7ca1cdfc85 100644 --- a/crmsh/report/utils.py +++ b/crmsh/report/utils.py @@ -1,11 +1,8 @@ # Copyright (C) 2017 Xin Liang # See COPYING for license information. -import bz2 -import lzma import datetime import glob -import gzip import os import re import shutil @@ -130,7 +127,7 @@ def check_collected_files(context: core.Context) -> List[str]: f_in_work_dir = os.path.join(context.work_dir, node, f) if os.path.isfile(f_in_work_dir) and not crmutils.file_is_empty(f_in_work_dir): results.append(f"{desc} {node}:") - results.append(read_from_file(f_in_work_dir)) + results.append(crmutils.read_from_file(f_in_work_dir)) return results @@ -144,7 +141,7 @@ def extract_critical_log(context: core.Context) -> List[str]: log_pattern_str = '|'.join(log_pattern_list) for f in glob.glob(f"{context.work_dir}/*/*.log"): - _list = re.findall(log_pattern_str, read_from_file(f)) + _list = re.findall(log_pattern_str, crmutils.read_from_file(f)) if _list: result_list.append(f"\nWARNINGS or ERRORS in {'/'.join(f.split('/')[3:])}:") result_list.extend(_list) @@ -203,7 +200,7 @@ def get_distro_info() -> str: res = None if os.path.exists(constants.OSRELEASE): logger.debug2(f"Using {constants.OSRELEASE} to get distribution info") - res = re.search("PRETTY_NAME=\"(.*)\"", read_from_file(constants.OSRELEASE)) + res = re.search("PRETTY_NAME=\"(.*)\"", crmutils.read_from_file(constants.OSRELEASE)) elif shutil.which("lsb_release"): logger.debug2("Using lsb_release to get distribution info") out = sh.LocalShell().get_stdout_or_raise_error("lsb_release -d") @@ -393,7 +390,7 @@ def is_our_log(context: core.Context, logf: str) -> int: Return log type LogType """ - data = read_from_file(logf) + data = crmutils.read_from_file(logf) if not data: return LogType.EMPTY stamp_type = determin_log_format(data) @@ -423,7 +420,7 @@ def create_description_template(context: core.Context) -> None: sysinfo_node_f = os.path.join(context.work_dir, n, constants.SYSINFO_F) if os.path.isfile(sysinfo_node_f): out_string += f"[Info from node {n}]:\n" - out_string += read_from_file(sysinfo_node_f) + out_string += crmutils.read_from_file(sysinfo_node_f) out_string += "\n\n\n\n" description_f = os.path.join(context.work_dir, constants.DESCRIPTION_F) @@ -434,7 +431,7 @@ def print_logseg(log_file: str, from_time: float, to_time: float) -> str: """ Print the log segment specified by the given timestamps """ - data = read_from_file(log_file) + data = crmutils.read_from_file(log_file) if not data: return "" @@ -511,7 +508,7 @@ def _load_cib_from_work_dir(self) -> None: cib_file_list = glob.glob(f"{self.context.work_dir}/*/{constants.CIB_F}") if not cib_file_list: raise ReportGenericError(f"CIB file {constants.CIB_F} was not collected") - data = read_from_file(cib_file_list[0]) + data = crmutils.read_from_file(cib_file_list[0]) if not data: raise ReportGenericError(f"File {cib_file_list[0]} is empty") self.cib_data = data @@ -582,7 +579,7 @@ def sanitize(self) -> None: if not self.context.sanitize: return for f in self.file_list_in_workdir: - data = read_from_file(f) + data = crmutils.read_from_file(f) if not data: continue replaced_str = self._sub_sensitive_string(data) @@ -639,36 +636,8 @@ def verify(self) -> str: return getattr(self, f"verify_{self.pkg_type}")() -def get_open_method(infile): - """ - Get the appropriate file open method based on the file extension - """ - file_type_open_dict = { - "gz": gzip.open, - "bz2": bz2.open, - "xz": lzma.open - } - file_ext = infile.split('.')[-1] - return file_type_open_dict.get(file_ext, open) - - -def read_from_file(infile: str) -> str: - """ - Read content from a file - """ - _open = get_open_method(infile) - try: - with _open(infile, 'rt', encoding='utf-8', errors='replace') as f: - data = f.read() - except Exception as err: - logger.error("When reading file \"%s\": %s", infile, str(err)) - return "" - - return crmutils.to_ascii(data) - - def write_to_file(data: str, tofile: str) -> None: - _open = get_open_method(tofile) + _open = crmutils.get_open_method(tofile) with _open(tofile, 'w') as f: if _open == open: f.write(data) diff --git a/crmsh/utils.py b/crmsh/utils.py index 91c99d3855..82d033b986 100644 --- a/crmsh/utils.py +++ b/crmsh/utils.py @@ -22,6 +22,9 @@ import random import string import grp +import gzip +import bz2 +import lzma from pathlib import Path from contextlib import contextmanager, closing from stat import S_ISBLK @@ -2927,16 +2930,6 @@ def diff_and_patch(orig_cib_str, current_cib_str): return True -def read_from_file(infile): - """ - Read data from file in a save way, to avoid UnicodeDecodeError - """ - data = None - with open(infile, 'rt', encoding='utf-8', errors='replace') as f: - data = f.read() - return to_ascii(data) - - def detect_file(_file, remote=None): """ Detect if file exists, support both local and remote @@ -3104,4 +3097,32 @@ def parse_user_at_host(s: str): def file_is_empty(file: str) -> bool: return os.stat(file).st_size == 0 + + +def get_open_method(infile): + """ + Get the appropriate file open method based on the file extension + """ + file_type_open_dict = { + "gz": gzip.open, + "bz2": bz2.open, + "xz": lzma.open + } + file_ext = infile.split('.')[-1] + return file_type_open_dict.get(file_ext, open) + + +def read_from_file(infile: str) -> str: + """ + Read content from a file + """ + _open = get_open_method(infile) + try: + with _open(infile, 'rt', encoding='utf-8', errors='replace') as f: + data = f.read() + except Exception as err: + logger.error("When reading file \"%s\": %s", infile, str(err)) + return "" + + return data # vim:ts=4:sw=4:et: From e8e19c25db619fd96b605cf50c2a2868b6080cb0 Mon Sep 17 00:00:00 2001 From: xin liang Date: Fri, 24 Nov 2023 11:30:05 +0800 Subject: [PATCH 09/14] Dev: behave: Add 'No crmsh tracebacks' assert --- test/features/cluster_api.feature | 1 + test/features/crm_report_normal.feature | 1 + test/features/steps/step_implementation.py | 8 ++++++++ 3 files changed, 10 insertions(+) diff --git a/test/features/cluster_api.feature b/test/features/cluster_api.feature index 61140699b5..b8676be080 100644 --- a/test/features/cluster_api.feature +++ b/test/features/cluster_api.feature @@ -135,6 +135,7 @@ Feature: Functional test to cover SAP clusterAPI When Run "su - hacluster -c 'crm status'" on "hanode1" Then Expected "Online: [ hanode1 hanode2 ]" in stdout When Run "su - hacluster -c '/usr/sbin/crm report /tmp/report'" on "hanode1" + Then No crmsh tracebacks Then File "/tmp/report.tar.bz2" exists on "hanode1" And Directory "hanode1" in "/tmp/report.tar.bz2" And Directory "hanode2" in "/tmp/report.tar.bz2" diff --git a/test/features/crm_report_normal.feature b/test/features/crm_report_normal.feature index a7db7f8abe..00a1f2b41c 100644 --- a/test/features/crm_report_normal.feature +++ b/test/features/crm_report_normal.feature @@ -30,6 +30,7 @@ Feature: crm report functional test for common cases When Wait "10" seconds And Run "crm resource untrace d2" on "hanode1" And Run "crm report report" on "hanode1" + Then No crmsh tracebacks Then Directory "trace_ra" in "report.tar.bz2" And Directory "trace_d" in "report.tar.bz2" When Run "rm -rf report.tar.bz2 report" on "hanode1" diff --git a/test/features/steps/step_implementation.py b/test/features/steps/step_implementation.py index 9f25625557..74f0cc8e21 100644 --- a/test/features/steps/step_implementation.py +++ b/test/features/steps/step_implementation.py @@ -122,6 +122,14 @@ def step_impl(context): context.logger.info("\n{}".format(context.stderr)) +@then('No crmsh tracebacks') +def step_impl(context): + if "Traceback (most recent call last):" in context.stderr and \ + re.search('File "/usr/lib/python.*/crmsh/', context.stderr): + context.logger.info("\n{}".format(context.stderr)) + context.failed = True + + @when('Try "{cmd}" on "{addr}"') def step_impl(context, cmd, addr): run_command_local_or_remote(context, cmd, addr, exit_on_fail=False) From 1d336e335533eb2af1f7e8911ba629eb15a0ed18 Mon Sep 17 00:00:00 2001 From: xin liang Date: Fri, 1 Dec 2023 14:31:20 +0800 Subject: [PATCH 10/14] Dev: utils: Rename utils.check_space_option_value to utils.check_empty_option_value --- crmsh/main.py | 2 +- crmsh/report/core.py | 2 +- crmsh/ui_cluster.py | 2 +- crmsh/utils.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/crmsh/main.py b/crmsh/main.py index 96fc77d1ba..87229a5ef9 100644 --- a/crmsh/main.py +++ b/crmsh/main.py @@ -301,7 +301,7 @@ def compgen(): def parse_options(): opts, args = option_parser.parse_known_args() - utils.check_space_option_value(opts) + utils.check_empty_option_value(opts) config.core.debug = "yes" if opts.debug else config.core.debug options.profile = opts.profile or options.profile options.regression_tests = opts.regression_tests or options.regression_tests diff --git a/crmsh/report/core.py b/crmsh/report/core.py index 3582ff47ab..85e5db06ae 100644 --- a/crmsh/report/core.py +++ b/crmsh/report/core.py @@ -445,7 +445,7 @@ def parse_arguments(context: Context) -> None: Add, parse and process arguments """ args = add_arguments() - crmutils.check_space_option_value(args) + crmutils.check_empty_option_value(args) for arg in vars(args): value = getattr(args, arg) if value or not hasattr(context, arg): diff --git a/crmsh/ui_cluster.py b/crmsh/ui_cluster.py index 9899577719..fbc37e9a18 100644 --- a/crmsh/ui_cluster.py +++ b/crmsh/ui_cluster.py @@ -36,7 +36,7 @@ def parse_options(parser, args): if hasattr(options, 'help') and options.help: parser.print_help() return None, None - utils.check_space_option_value(options) + utils.check_empty_option_value(options) return options, args diff --git a/crmsh/utils.py b/crmsh/utils.py index 82d033b986..2bd787e215 100644 --- a/crmsh/utils.py +++ b/crmsh/utils.py @@ -2208,14 +2208,14 @@ def get_nodeid_from_name(name): return None -def check_space_option_value(options): +def check_empty_option_value(options): if not isinstance(options, argparse.Namespace): raise ValueError("Expected type of \"options\" is \"argparse.Namespace\", not \"{}\"".format(type(options))) for opt in vars(options): value = getattr(options, opt) if isinstance(value, str) and len(value.strip()) == 0: - raise ValueError("Space value not allowed for dest \"{}\"".format(opt)) + raise ValueError("Empty value not allowed for dest \"{}\"".format(opt)) def interface_choice(): From 677727d47adedef7372c64f663f5ce52ab2fd6bf Mon Sep 17 00:00:00 2001 From: xin liang Date: Mon, 4 Dec 2023 11:11:20 +0800 Subject: [PATCH 11/14] Dev: report: Use ast.literal_eval instead of eval To safely evaluate the string representation of a tarball from push_data --- crmsh/report/core.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/crmsh/report/core.py b/crmsh/report/core.py index 85e5db06ae..0901d2fcf9 100644 --- a/crmsh/report/core.py +++ b/crmsh/report/core.py @@ -9,6 +9,7 @@ import sys import shutil import json +import ast from inspect import getmembers, isfunction from typing import List @@ -275,11 +276,19 @@ def start_collector(node: str, context: Context) -> None: # crm report data from collector compress_data = data.lstrip(constants.COMPRESS_DATA_FLAG) else: - # log data from collector + # INFO log data from collector print(data) + try: + # Safely evaluate the string representation of a tarball from push_data + data_object = ast.literal_eval(compress_data) + except (SyntaxError, ValueError) as e: + logger.error(f"Error evaluating data: {e}") + return + + # Extract the tarball in the specified working directory cmd = f"cd {context.work_dir} && tar x" - ShellUtils().get_stdout(cmd, input_s=eval(compress_data)) + ShellUtils().get_stdout(cmd, input_s=data_object) def process_dest(context: Context) -> None: From fc0b56dd232cbded83fc7ae12da65dca6118f817 Mon Sep 17 00:00:00 2001 From: xin liang Date: Mon, 4 Dec 2023 16:39:30 +0800 Subject: [PATCH 12/14] Dev: behave: Complete test case for crm report with ssh-agent To check if remote node's info can be collected --- test/features/ssh_agent.feature | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/features/ssh_agent.feature b/test/features/ssh_agent.feature index 5b959c413c..5c632dd81a 100644 --- a/test/features/ssh_agent.feature +++ b/test/features/ssh_agent.feature @@ -37,7 +37,9 @@ Feature: ssh-agent support And Run "test x3 == x$(sudo awk 'END {print NR}' ~hacluster/.ssh/authorized_keys)" OK on "hanode3" Scenario: crm report - Then Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm report" OK + Then Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm report /tmp/report1" OK on "hanode1" + Then Directory "hanode2" in "/tmp/report1.tar.bz2" + Then Directory "hanode3" in "/tmp/report1.tar.bz2" Scenario: Use qnetd Given Run "crm cluster stop" OK on "hanode1,hanode2,hanode3" From feffabd386af639e9ee73ca8301bb1a16f19018f Mon Sep 17 00:00:00 2001 From: xin liang Date: Tue, 5 Dec 2023 12:14:00 +0800 Subject: [PATCH 13/14] Dev: report: Consider ssh agent case in crm report --- crmsh/report/core.py | 2 +- crmsh/utils.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/crmsh/report/core.py b/crmsh/report/core.py index 0901d2fcf9..ffc6421345 100644 --- a/crmsh/report/core.py +++ b/crmsh/report/core.py @@ -259,7 +259,7 @@ def start_collector(node: str, context: Context) -> None: else: node = f"{context.ssh_user}@{node}" if context.ssh_user else node cmd = cmd.replace('"', '\\"') - cmd = f'ssh {constants.SSH_OPTS} {node} "{context.sudo} {cmd}"' + cmd = f'{crmutils.get_ssh_agent_str()} ssh {constants.SSH_OPTS} {node} "{context.sudo} {cmd}"' code, out, err = sh.LocalShell().get_rc_stdout_stderr(context.ssh_user, cmd) if code != 0: diff --git a/crmsh/utils.py b/crmsh/utils.py index 2bd787e215..38b5342b29 100644 --- a/crmsh/utils.py +++ b/crmsh/utils.py @@ -2127,11 +2127,18 @@ def check_ssh_passwd_need(local_user, remote_user, host): Check whether access to host need password """ ssh_options = "-o StrictHostKeyChecking=no -o EscapeChar=none -o ConnectTimeout=15" - ssh_cmd = "ssh {} -T -o Batchmode=yes {}@{} true".format(ssh_options, remote_user, host) + ssh_cmd = "{} ssh {} -T -o Batchmode=yes {}@{} true".format(get_ssh_agent_str(), ssh_options, remote_user, host) rc, _ = sh.LocalShell().get_rc_and_error(local_user, ssh_cmd) return rc != 0 +def get_ssh_agent_str(): + ssh_agent_str = "" + if crmsh.user_of_host.instance().use_ssh_agent(): + ssh_agent_str = f"SSH_AUTH_SOCK={os.environ.get('SSH_AUTH_SOCK')}" + return ssh_agent_str + + def check_port_open(ip, port): import socket From cdb94b1eee7b342e7e4ccd2626f764eeb2e3d1ee Mon Sep 17 00:00:00 2001 From: xin liang Date: Fri, 11 Aug 2023 15:21:12 +0800 Subject: [PATCH 14/14] Dev: unittest: Add unit test for crm report module --- data-manifest | 3 + test/unittests/test_corosync.py | 4 - test/unittests/test_report_collect.py | 593 ++++++++++++++++++ test/unittests/test_report_core.py | 552 +++++++++++++++++ test/unittests/test_report_utils.py | 832 ++++++++++++++++++++++++++ test/unittests/test_utils.py | 131 +--- 6 files changed, 1981 insertions(+), 134 deletions(-) create mode 100644 test/unittests/test_report_collect.py create mode 100644 test/unittests/test_report_core.py create mode 100644 test/unittests/test_report_utils.py diff --git a/data-manifest b/data-manifest index b6d65790f5..64a5d8c533 100644 --- a/data-manifest +++ b/data-manifest @@ -202,6 +202,9 @@ test/unittests/test_parse.py test/unittests/test_prun.py test/unittests/test_qdevice.py test/unittests/test_ratrace.py +test/unittests/test_report_collect.py +test/unittests/test_report_core.py +test/unittests/test_report_utils.py test/unittests/test_sbd.py test/unittests/test_scripts.py test/unittests/test_service_manager.py diff --git a/test/unittests/test_corosync.py b/test/unittests/test_corosync.py index 285475c08c..2443f36fb2 100644 --- a/test/unittests/test_corosync.py +++ b/test/unittests/test_corosync.py @@ -308,10 +308,6 @@ def test_parse(self): _valid(p) self.assertEqual(p.get('bananas'), '5') - def test_logfile(self): - self.assertEqual(corosync.logfile(F1), '/var/log/cluster/corosync.log') - self.assertEqual(corosync.logfile('# nothing\n'), None) - def test_udpu(self): p = Parser(F2) _valid(p) diff --git a/test/unittests/test_report_collect.py b/test/unittests/test_report_collect.py new file mode 100644 index 0000000000..a0b93f8460 --- /dev/null +++ b/test/unittests/test_report_collect.py @@ -0,0 +1,593 @@ +from subprocess import TimeoutExpired +from crmsh.report import collect, constants + +import unittest +from unittest import mock + + +class TestCollect(unittest.TestCase): + + @mock.patch('logging.Logger.warning') + @mock.patch('os.path.isfile') + def test_get_pcmk_log_no_config(self, mock_isfile, mock_warning): + mock_isfile.side_effect = [False, False, False] + res = collect.get_pcmk_log() + self.assertEqual(res, "") + mock_isfile.assert_has_calls([ + mock.call(constants.PCMKCONF), + mock.call("/var/log/pacemaker/pacemaker.log"), + mock.call("/var/log/pacemaker.log") + ]) + mock_warning.assert_called_once_with("No valid pacemaker log file found") + + @mock.patch('logging.Logger.warning') + @mock.patch('crmsh.utils.read_from_file') + @mock.patch('os.path.isfile') + def test_get_pcmk_log(self, mock_isfile, mock_read, mock_warning): + mock_isfile.return_value = True + mock_read.return_value = """ +# has been enabled, those as well). This log is of more use to developers and +# advanced system administrators, and when reporting problems. +PCMK_logfile=/var/log/pacemaker/pacemaker.log + +# Set the permissions on the above log file to owner/group read/write + """ + res = collect.get_pcmk_log() + self.assertEqual(res, "/var/log/pacemaker/pacemaker.log") + mock_isfile.assert_has_calls([ + mock.call(constants.PCMKCONF), + mock.call("/var/log/pacemaker/pacemaker.log") + ]) + mock_read.assert_called_once_with(constants.PCMKCONF) + + @mock.patch('crmsh.report.utils.dump_logset') + @mock.patch('os.path.isfile') + @mock.patch('crmsh.report.collect.get_pcmk_log') + @mock.patch('crmsh.report.collect.get_corosync_log') + def test_collect_ha_logs(self, mock_corosync_log, mock_get_log, mock_isfile, mock_dump): + mock_corosync_log.return_value = "/var/log/cluster/corosync.log" + mock_get_log.return_value = "/var/pacemaker.log" + mock_isfile.side_effect = [True, True] + mock_ctx_inst = mock.Mock(extra_log_list=[]) + + collect.collect_ha_logs(mock_ctx_inst) + + mock_get_log.assert_called_once_with() + mock_isfile.assert_has_calls([ + mock.call(mock_get_log.return_value), + mock.call(mock_corosync_log.return_value) + ]) + mock_dump.assert_has_calls([ + mock.call(mock_ctx_inst, mock_get_log.return_value), + mock.call(mock_ctx_inst, mock_corosync_log.return_value) + ]) + + @mock.patch('logging.Logger.warning') + @mock.patch('os.path.exists') + @mock.patch('crmsh.corosync.conf') + def test_get_corosync_log_not_exist(self, mock_conf, mock_exists, mock_warning): + mock_conf.return_value = "/etc/corosync/corosync.conf" + mock_exists.return_value = False + self.assertEqual(collect.get_corosync_log(), "") + + @mock.patch('crmsh.corosync.get_value') + @mock.patch('os.path.exists') + @mock.patch('crmsh.corosync.conf') + def test_get_corosync_log(self, mock_conf, mock_exists, mock_get_value): + mock_conf.return_value = "/etc/corosync/corosync.conf" + mock_get_value.return_value = "/var/log/cluster/corosync.log" + mock_exists.return_value = True + self.assertEqual(collect.get_corosync_log(), mock_get_value.return_value) + + @mock.patch('crmsh.report.utils.real_path') + @mock.patch('crmsh.report.collect.logger.debug2') + @mock.patch('crmsh.utils.str2file') + @mock.patch('crmsh.report.utils.get_cmd_output') + @mock.patch('logging.Logger.debug') + @mock.patch('crmsh.report.utils.ts_to_str') + def test_collect_journal_logs(self, mock_ts_to_str, mock_debug, mock_get_cmd_output, + mock_str2file, mock_debug2, mock_real_path): + mock_real_path.side_effect = [ + constants.JOURNAL_F, + constants.JOURNAL_PCMK_F, + constants.JOURNAL_COROSYNC_F, + constants.JOURNAL_SBD_F + ] + mock_ctx_inst = mock.Mock(from_time=1234, to_time=5678, work_dir="/opt/work") + mock_ts_to_str.side_effect = ["10.10", "10.12"] + mock_get_cmd_output.side_effect = ["data_default", "data_pacemaker", "data_corosync", "data_sbd"] + collect.collect_journal_logs(mock_ctx_inst) + mock_ts_to_str.assert_has_calls([ + mock.call(mock_ctx_inst.from_time), + mock.call(mock_ctx_inst.to_time) + ]) + cmd_list = [ + 'journalctl -o short-iso-precise --since "10.10" --until "10.12" --no-pager | tail -n +2', + 'journalctl -u pacemaker -o short-iso-precise --since "10.10" --until "10.12" --no-pager | tail -n +2', + 'journalctl -u corosync -o short-iso-precise --since "10.10" --until "10.12" --no-pager | tail -n +2', + 'journalctl -u sbd -o short-iso-precise --since "10.10" --until "10.12" --no-pager | tail -n +2' + ] + mock_get_cmd_output.assert_has_calls([ + mock.call(cmd_list[0]), + mock.call(cmd_list[1]), + mock.call(cmd_list[2]), + mock.call(cmd_list[3]), + ]) + mock_debug2.assert_has_calls([ + mock.call("Collect journal logs since: 10.10 until: 10.12"), + mock.call(f"Running command: {cmd_list[0]}"), + mock.call(f"Running command: {cmd_list[1]}"), + mock.call(f"Running command: {cmd_list[2]}"), + mock.call(f"Running command: {cmd_list[3]}"), + ]) + mock_debug.assert_has_calls([ + mock.call(f"Dump jounal log for default into {constants.JOURNAL_F}"), + mock.call(f"Dump jounal log for pacemaker into {constants.JOURNAL_PCMK_F}"), + mock.call(f"Dump jounal log for corosync into {constants.JOURNAL_COROSYNC_F}"), + mock.call(f"Dump jounal log for sbd into {constants.JOURNAL_SBD_F}") + ]) + + @mock.patch('crmsh.report.collect.ShellUtils') + def test_dump_D_process_empty(self, mock_run): + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + mock_run_inst.get_stdout_stderr.return_value = (0, None, None) + res = collect.dump_D_process() + self.assertEqual(res, "Dump D-state process stack: 0\n") + + @mock.patch('crmsh.report.collect.ShellUtils') + def test_dump_D_process(self, mock_run): + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + mock_run_inst.get_stdout_stderr.side_effect = [ + (0, "1000", None), + (0, "data1", None), + (0, "data2", None) + ] + res = collect.dump_D_process() + self.assertEqual(res, "Dump D-state process stack: 1\npid: 1000 comm: data1\ndata2\n\n") + mock_run_inst.get_stdout_stderr.assert_has_calls([ + mock.call("ps aux|awk '$8 ~ /^D/{print $2}'"), + mock.call('cat /proc/1000/comm'), + mock.call('cat /proc/1000/stack') + ]) + + @mock.patch('logging.Logger.debug') + @mock.patch('os.path.exists') + def test_collect_sbd_info_no_config(self, mock_exists, mock_debug): + mock_exists.return_value = False + mock_ctx_inst = mock.Mock() + collect.collect_sbd_info(mock_ctx_inst) + mock_exists.assert_called_once_with(constants.SBDCONF) + mock_debug.assert_called_once_with(f"SBD config file {constants.SBDCONF} does not exist") + + @mock.patch('shutil.which') + @mock.patch('shutil.copy2') + @mock.patch('os.path.exists') + def test_collect_sbd_info_no_cmd(self, mock_exists, mock_copy, mock_which): + mock_exists.return_value = True + mock_which.return_value = False + mock_ctx_inst = mock.Mock(work_dir="/opt") + collect.collect_sbd_info(mock_ctx_inst) + mock_exists.assert_called_once_with(constants.SBDCONF) + mock_copy.assert_called_once_with(constants.SBDCONF, mock_ctx_inst.work_dir) + mock_which.assert_called_once_with("sbd") + + @mock.patch('crmsh.report.utils.real_path') + @mock.patch('builtins.open', create=True) + @mock.patch('logging.Logger.debug') + @mock.patch('crmsh.report.utils.get_cmd_output') + @mock.patch('shutil.which') + @mock.patch('shutil.copy2') + @mock.patch('os.path.exists') + def test_collect_sbd_info(self, mock_exists, mock_copy, mock_which, mock_run, mock_debug, mock_open_file, mock_real_path): + mock_real_path.return_value = constants.SBD_F + mock_exists.return_value = True + mock_which.return_value = True + mock_open_write = mock.mock_open() + file_handle = mock_open_write.return_value.__enter__.return_value + mock_open_file.return_value = mock_open_write.return_value + mock_run.return_value = "data" + mock_ctx_inst = mock.Mock(work_dir="/opt") + + collect.collect_sbd_info(mock_ctx_inst) + + mock_exists.assert_called_once_with(constants.SBDCONF) + mock_copy.assert_called_once_with(constants.SBDCONF, mock_ctx_inst.work_dir) + mock_which.assert_called_once_with("sbd") + mock_open_file.assert_called_once_with(f"{mock_ctx_inst.work_dir}/{constants.SBD_F}", "w") + file_handle.write.assert_has_calls([ + mock.call("\n\n#=====[ Command ] ==========================#\n"), + mock.call("# . /etc/sysconfig/sbd;export SBD_DEVICE;sbd dump;sbd list\n"), + mock.call("data") + ]) + mock_debug.assert_called_once_with(f"Dump SBD config file into {constants.SBD_F}") + + @mock.patch('logging.Logger.warning') + @mock.patch('crmsh.report.collect.ShellUtils') + def test_pe_to_dot(self, mock_run, mock_warning): + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + mock_run_inst.get_stdout_stderr.return_value = (1, None, None) + collect.pe_to_dot("/opt/pe-input-0.bz2") + mock_run_inst.get_stdout_stderr.assert_called_once_with("crm_simulate -D /opt/pe-input-0.dot -x /opt/pe-input-0.bz2") + mock_warning.assert_called_once_with('pe_to_dot: %s -> %s failed', '/opt/pe-input-0.bz2', '/opt/pe-input-0.dot') + + @mock.patch('crmsh.report.utils.find_files_in_timespan') + @mock.patch('crmsh.report.collect.logger.debug2') + def test_collect_pe_inputs_no_found(self, mock_debug, mock_find_files): + mock_ctx_inst = mock.Mock(pe_dir="/opt/pe_dir") + mock_find_files.return_value = [] + collect.collect_pe_inputs(mock_ctx_inst) + mock_find_files.assert_called_once_with(mock_ctx_inst, [mock_ctx_inst.pe_dir]) + mock_debug.assert_has_calls([ + mock.call(f"Looking for PE files in {mock_ctx_inst.pe_dir}"), + mock.call("No PE file found for the giving time") + ]) + + @mock.patch('crmsh.report.utils.real_path') + @mock.patch('crmsh.report.collect.pe_to_dot') + @mock.patch('os.symlink') + @mock.patch('crmsh.utils.mkdirp') + @mock.patch('crmsh.report.utils.find_files_in_timespan') + @mock.patch('crmsh.report.collect.logger.debug2') + @mock.patch('logging.Logger.debug') + def test_collect_pe_inputs(self, mock_debug, mock_debug2, mock_find_files, mock_mkdir, mock_symlink, mock_to_dot, mock_real_path): + mock_real_path.return_value = "pe_dir" + mock_ctx_inst = mock.Mock(pe_dir="/opt/pe_dir", work_dir="/opt/work_dir", speed_up=False) + mock_find_files.return_value = ["/opt/pe_dir/pe_input1", "/opt/pe_dir/pe_input2"] + + collect.collect_pe_inputs(mock_ctx_inst) + + mock_find_files.assert_called_once_with(mock_ctx_inst, [mock_ctx_inst.pe_dir]) + mock_debug2.assert_has_calls([ + mock.call(f"Looking for PE files in {mock_ctx_inst.pe_dir}"), + mock.call(f"Found 2 PE files in {mock_ctx_inst.pe_dir}"), + ]) + mock_debug.assert_called_once_with(f"Dump PE files into pe_dir") + + @mock.patch('crmsh.report.utils.real_path') + @mock.patch('crmsh.report.collect.logger.debug2') + @mock.patch('crmsh.utils.str2file') + @mock.patch('logging.Logger.warning') + @mock.patch('crmsh.report.utils.get_cmd_output') + def test_collect_sys_stats(self, mock_run, mock_warning, mock_str2file, mock_debug2, mock_real_path): + mock_real_path.return_value = constants.SYSSTATS_F + mock_run.side_effect = [ + "data_hostname", "data_uptime", "data_ps_axf", "data_ps_auxw", + "data_top", "data_ip_addr", "data_ip_link", "data_ip_show", "data_iscsi", + "data_lspci", "data_mount", "data_cpuinfo", TimeoutExpired("df", 5) + ] + mock_ctx_inst = mock.Mock(work_dir="/opt") + collect.collect_sys_stats(mock_ctx_inst) + mock_warning.assert_called_once_with(f"Timeout while running command: df") + mock_run.assert_has_calls([ + mock.call("hostname", timeout=5), + mock.call("uptime", timeout=5), + mock.call("ps axf", timeout=5), + mock.call("ps auxw", timeout=5), + mock.call("top -b -n 1", timeout=5), + mock.call("ip addr", timeout=5), + mock.call("ip -s link", timeout=5), + mock.call("ip n show", timeout=5), + mock.call("lsscsi", timeout=5), + mock.call("lspci", timeout=5), + mock.call("mount", timeout=5), + mock.call("cat /proc/cpuinfo", timeout=5), + mock.call("df", timeout=5) + ]) + + @mock.patch('crmsh.report.utils.real_path') + @mock.patch('logging.Logger.debug') + @mock.patch('crmsh.report.utils.get_distro_info') + @mock.patch('crmsh.utils.str2file') + @mock.patch('os.uname') + @mock.patch('crmsh.report.utils.Package') + def test_collect_sys_info(self, mock_package, mock_uname, mock_str2file, mock_get_distro, mock_debug, mock_real_path): + mock_real_path.return_value = constants.SYSINFO_F + mock_package_inst = mock.Mock() + mock_package.return_value = mock_package_inst + mock_package_inst.version = mock.Mock(return_value="version_data\n") + mock_package_inst.verify = mock.Mock(return_value="verify_data\n") + mock_ctx_inst = mock.Mock(speed_up=False, work_dir="/opt/work") + mock_uname.return_value = ("Linux", None, "4.5", None, "x86_64") + mock_get_distro.return_value = "suse" + + collect.collect_sys_info(mock_ctx_inst) + + mock_package.assert_called_once_with(constants.PACKAGES) + mock_str2file.assert_called_once_with('##### System info #####\nPlatform: Linux\nKernel release: 4.5\nArchitecture: x86_64\nDistribution: suse\n\n##### Installed cluster related packages #####\nversion_data\n\n\n##### Verification output of packages #####\nverify_data\n', '/opt/work/sysinfo.txt') + mock_debug.assert_called_once_with(f"Dump packages and platform info into {constants.SYSINFO_F}") + + @mock.patch('crmsh.report.utils.real_path') + @mock.patch('crmsh.report.collect.dump_configurations') + @mock.patch('crmsh.report.collect.consume_cib_in_workdir') + @mock.patch('crmsh.report.collect.logger.debug2') + @mock.patch('crmsh.utils.str2file') + @mock.patch('crmsh.report.collect.dump_runtime_state') + @mock.patch('crmsh.report.collect.ServiceManager') + def test_collect_config_running(self, mock_service, mock_dump_state, mock_write, mock_debug2, mock_cib, mock_dump_config, mock_real_path): + mock_real_path.return_value = "workdir" + mock_service_inst = mock.Mock() + mock_service.return_value = mock_service_inst + mock_service_inst.service_is_active.return_value = True + mock_ctx_inst = mock.Mock(work_dir="/opt/workdir") + collect.collect_config(mock_ctx_inst) + + @mock.patch('crmsh.report.utils.real_path') + @mock.patch('crmsh.report.collect.dump_configurations') + @mock.patch('crmsh.report.collect.consume_cib_in_workdir') + @mock.patch('crmsh.report.collect.logger.debug2') + @mock.patch('crmsh.utils.str2file') + @mock.patch('shutil.copy2') + @mock.patch('crmsh.report.collect.ServiceManager') + def test_collect_config_stopped(self, mock_service, mock_copy2, mock_write, mock_debug2, mock_cib, mock_dump_config, mock_real_path): + mock_real_path.return_value = "workdir" + mock_service_inst = mock.Mock() + mock_service.return_value = mock_service_inst + mock_service_inst.service_is_active.return_value = False + mock_ctx_inst = mock.Mock(work_dir="/opt/workdir", cib_dir="/var/log/pacemaker/cib") + collect.collect_config(mock_ctx_inst) + + @mock.patch('crmsh.utils.str2file') + @mock.patch('crmsh.report.collect.sh.cluster_shell') + @mock.patch('os.path.isfile') + def test_consume_cib_in_workdir(self, mock_isfile, mock_run, mock_str2file): + mock_isfile.return_value = True + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + mock_run_inst.get_stdout_or_raise_error.side_effect = ["data1", "data2"] + collect.consume_cib_in_workdir("/workdir") + mock_isfile.assert_called_once_with(f"/workdir/{constants.CIB_F}") + mock_run_inst.get_stdout_or_raise_error.assert_has_calls([ + mock.call('CIB_file=/workdir/cib.xml crm configure show'), + mock.call('crm_verify -V -x /workdir/cib.xml') + ]) + mock_str2file.assert_has_calls([ + mock.call("data1", f"/workdir/{constants.CONFIGURE_SHOW_F}"), + mock.call("data2", f"/workdir/{constants.CRM_VERIFY_F}") + ]) + + @mock.patch('crmsh.report.collect.logger.debug2') + @mock.patch('crmsh.report.collect.sh.cluster_shell') + def test_collect_ratraces_return(self, mock_run, mock_debug): + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + mock_run_inst.get_rc_stdout_stderr_without_input.return_value = (0, "data", None) + mock_ctx_inst = mock.Mock(node_list=["node1"]) + collect.collect_ratraces(mock_ctx_inst) + mock_debug.assert_not_called() + + @mock.patch('crmsh.report.utils.real_path') + @mock.patch('crmsh.report.collect.logger.debug2') + @mock.patch('shutil.copy2') + @mock.patch('crmsh.utils.mkdirp') + @mock.patch('crmsh.report.utils.find_files_in_timespan') + @mock.patch('logging.Logger.debug') + @mock.patch('crmsh.report.collect.sh.cluster_shell') + def test_collect_ratraces(self, mock_run, mock_debug, mock_find, mock_mkdirp, mock_copy, mock_debug2, mock_real_path): + mock_real_path.return_value = "/var/log" + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + data = "INFO: Trace for .* is written to /var/log/cluster/pacemaker.log" + mock_run_inst.get_rc_stdout_stderr_without_input.return_value = (0, data, None) + mock_ctx_inst = mock.Mock(node_list=["node1"], work_dir="/opt/work") + mock_find.return_value = ["/var/log/cluster"] + + collect.collect_ratraces(mock_ctx_inst) + + mock_debug2.assert_called_once_with('Looking for RA trace files in "%s"', '/var/log/cluster') + mock_debug.assert_called_once_with(f'Dump RA trace files into {mock_real_path.return_value}') + + @mock.patch('crmsh.report.collect.ShellUtils') + def test_lsof_ocfs2_device(self, mock_run): + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + mount_data = """ +/dev/vda3 on /home type xfs (rw,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota) +tmpfs on /run/user/0 type tmpfs (rw,nosuid,nodev,relatime,size=169544k,nr_inodes=42386,mode=700,inode64) +/dev/sda7 on /srv/clusterfs type ocfs2 (rw,relatime,heartbeat=non + """ + mock_run_inst.get_stdout_stderr.side_effect = [(0, mount_data, None), (0, "data", None)] + res = collect.lsof_ocfs2_device() + self.assertEqual(res, "\n\n#=====[ Command ] ==========================#\n# lsof /dev/sda7\ndata") + mock_run_inst.get_stdout_stderr.assert_has_calls([ + mock.call("mount"), + mock.call("lsof /dev/sda7") + ]) + + @mock.patch('crmsh.report.utils.get_cmd_output') + @mock.patch('os.path.exists') + @mock.patch('shutil.which') + def test_ocfs2_commands_output(self, mock_which, mock_exists, mock_run): + mock_which.side_effect = [False for i in range(5)] + [True, True] + mock_exists.return_value = False + mock_run.return_value = "data" + res = collect.ocfs2_commands_output() + self.assertEqual(res, "\n\n#===== [ Command ] ==========================#\n# mount\ndata") + + @mock.patch('crmsh.report.collect.logger.debug2') + @mock.patch('crmsh.utils.str2file') + @mock.patch('crmsh.report.collect.ShellUtils') + def test_collect_ocfs2_info_error(self, mock_run, mock_str2file, mock_debug2): + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + mock_run_inst.get_stdout_stderr.return_value = (1, None, "error") + mock_ctx_inst = mock.Mock(work_dir="/opt/workdir") + collect.collect_ocfs2_info(mock_ctx_inst) + mock_str2file.assert_called_once_with('Failed to run "mounted.ocfs2 -d": error', '/opt/workdir/ocfs2.txt') + + @mock.patch('crmsh.report.collect.logger.debug2') + @mock.patch('crmsh.utils.str2file') + @mock.patch('crmsh.report.collect.ShellUtils') + def test_collect_ocfs2_info_no_found(self, mock_run, mock_str2file, mock_debug2): + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + mock_run_inst.get_stdout_stderr.return_value = (0, "data", None) + mock_ctx_inst = mock.Mock(work_dir="/opt/workdir") + collect.collect_ocfs2_info(mock_ctx_inst) + mock_str2file.assert_called_once_with('No ocfs2 partitions found', '/opt/workdir/ocfs2.txt') + + @mock.patch('crmsh.report.utils.real_path') + @mock.patch('crmsh.report.collect.ocfs2_commands_output') + @mock.patch('crmsh.report.collect.lsof_ocfs2_device') + @mock.patch('crmsh.report.collect.dump_D_process') + @mock.patch('crmsh.report.collect.logger.debug2') + @mock.patch('crmsh.utils.str2file') + @mock.patch('crmsh.report.collect.ShellUtils') + def test_collect_ocfs2_info(self, mock_run, mock_str2file, mock_debug2, mock_D, mock_lsof, mock_output, mock_real_path): + mock_real_path.return_value = constants.OCFS2_F + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + mock_run_inst.get_stdout_stderr.return_value = (0, "line1\nline2", None) + mock_D.return_value = "data_D\n" + mock_lsof.return_value = "data_lsof\n" + mock_output.return_value = "data_output\n" + mock_ctx_inst = mock.Mock(work_dir="/opt/workdir") + collect.collect_ocfs2_info(mock_ctx_inst) + mock_str2file.assert_called_once_with('data_D\ndata_lsof\ndata_output\n', '/opt/workdir/ocfs2.txt') + + @mock.patch('crmsh.report.utils.real_path') + @mock.patch('logging.Logger.debug') + @mock.patch('crmsh.utils.str2file') + @mock.patch('crmsh.report.utils.get_cmd_output') + @mock.patch('shutil.which') + def test_collect_dlm_info(self, mock_which, mock_get_output, mock_str2file, mock_debug, mock_real_path): + mock_real_path.return_value = constants.DLM_DUMP_F + mock_which.return_value = True + ls_data = """ +dlm lockspaces +name 08BB5A6A38EE491DBF63627EEB57E558 +id 0x19041a12 + """ + mock_get_output.side_effect = [ls_data, "lockdebug data", "dump data"] + mock_ctx_inst = mock.Mock(work_dir="/opt/work_dir") + collect.collect_dlm_info(mock_ctx_inst) + mock_debug.assert_called_once_with(f"Dump DLM information into {constants.DLM_DUMP_F}") + + @mock.patch('crmsh.report.collect.dump_core_info') + @mock.patch('logging.Logger.warning') + @mock.patch('os.path.basename') + @mock.patch('crmsh.report.utils.find_files_in_timespan') + def test_collect_coredump_info(self, mock_find, mock_basename, mock_warning, mock_dump): + mock_ctx_inst = mock.Mock(cores_dir_list=['/var/lib/pacemaker/cores'], work_dir="/opt/work_dir") + mock_find.return_value = ["/var/lib/pacemaker/cores/core.1"] + mock_basename.return_value = "core.1" + collect.collect_coredump_info(mock_ctx_inst) + mock_dump.assert_called_once_with("/opt/work_dir", mock_find.return_value) + mock_warning.assert_called_once_with(f"Found coredump file: {mock_find.return_value}") + + @mock.patch('crmsh.report.collect.ShellUtils') + def test_find_binary_path_for_core_not_found(self, mock_run): + mock_run().get_stdout_stderr.return_value = (0, "Core not found", None) + res = collect.find_binary_path_for_core("core.1") + self.assertEqual("Cannot find the program path for core core.1", res) + + @mock.patch('crmsh.report.collect.ShellUtils') + def test_find_binary_path_for_core(self, mock_run): + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + mock_run_inst.get_stdout_stderr.return_value = (0, "Core was generated by `/usr/sbin/crm_mon'", None) + res = collect.find_binary_path_for_core("core.1") + self.assertEqual("Core core.1 was generated by /usr/sbin/crm_mon", res) + + @mock.patch('crmsh.report.utils.real_path') + @mock.patch('crmsh.report.collect.logger.debug2') + @mock.patch('crmsh.utils.str2file') + @mock.patch('logging.Logger.warning') + @mock.patch('shutil.which') + def test_dump_core_info_no_gdb(self, mock_which, mock_warning, mock_str2file, mock_debug2, mock_real_path): + mock_real_path.return_value = constants.COREDUMP_F + mock_which.return_value = False + collect.dump_core_info("/opt/workdir", ["core.1"]) + mock_warning.assert_called_once_with("Please install gdb to get more info for coredump files") + mock_debug2(f"Dump coredump info into {constants.COREDUMP_F}") + + @mock.patch('crmsh.report.utils.real_path') + @mock.patch('crmsh.report.collect.logger.debug2') + @mock.patch('crmsh.utils.str2file') + @mock.patch('crmsh.report.collect.find_binary_path_for_core') + @mock.patch('shutil.which') + def test_dump_core_info(self, mock_which, mock_find_binary, mock_str2file, mock_debug2, mock_real_path): + mock_real_path.return_value = constants.COREDUMP_F + mock_which.return_value = True + mock_find_binary.return_value = "data" + collect.dump_core_info("/opt/workdir", ["core.1"]) + mock_str2file.assert_called_once_with("data\n\nPlease utilize the gdb and debuginfo packages to obtain more detailed information locally", f"/opt/workdir/{constants.COREDUMP_F}") + mock_debug2(f"Dump coredump info into {constants.COREDUMP_F}") + + @mock.patch('crmsh.utils.str2file') + @mock.patch('pwd.getpwnam') + @mock.patch('os.stat') + @mock.patch('os.path.isdir') + def test_collect_perms_state(self, mock_isdir, mock_stat, mock_getpwnam, mock_str2file): + mock_ctx_inst = mock.Mock( + pcmk_lib_dir="/var/lib/pacemaker", + pe_dir="/var/lib/pacemaker/pe", + cib_dir="/var/lib/pacemaker/cib", + work_dir="/opt/work_dir" + ) + mock_isdir.side_effect = [False, True, True] + mock_stat_inst_pe = mock.Mock(st_uid=1000, st_gid=1000, st_mode=0o750) + mock_stat_inst_cib = mock.Mock(st_uid=1000, st_gid=1000, st_mode=0o750) + mock_stat.side_effect = [mock_stat_inst_pe, mock_stat_inst_cib] + mock_getpwnam_inst_pe = mock.Mock(pw_uid=1000, pw_gid=1000) + mock_getpwnam_inst_cib = mock.Mock(pw_uid=1001, pw_gid=1000) + mock_getpwnam.side_effect = [mock_getpwnam_inst_pe, mock_getpwnam_inst_cib] + + collect.collect_perms_state(mock_ctx_inst) + + data = "##### Check perms for /var/lib/pacemaker: /var/lib/pacemaker is not a directory or does not exist\n##### Check perms for /var/lib/pacemaker/pe: OK\n##### Check perms for /var/lib/pacemaker/cib: Permissions or ownership for /var/lib/pacemaker/cib are incorrect\n" + mock_str2file.assert_called_once_with(data, f"/opt/work_dir/{constants.PERMISSIONS_F}") + + @mock.patch('crmsh.report.utils.real_path') + @mock.patch('crmsh.utils.this_node') + @mock.patch('crmsh.utils.get_dc') + @mock.patch('logging.Logger.debug') + @mock.patch('crmsh.utils.str2file') + @mock.patch('crmsh.report.collect.sh.cluster_shell') + def test_dump_runtime_state(self, mock_run, mock_str2file, mock_debug, mock_get_dc, mock_this_node, mock_real_path): + mock_real_path.side_effect = [ + constants.CRM_MON_F, + constants.CIB_F, + constants.MEMBERSHIP_F, + "workdir" + ] + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + mock_run_inst.get_stdout_or_raise_error.side_effect = ["crm_mon_data", "cib_data", "crm_node_data"] + mock_get_dc.return_value = "node1" + mock_this_node.return_value = "node1" + collect.dump_runtime_state("/opt/workdir") + mock_debug.assert_has_calls([ + mock.call(f"Dump cluster state into {constants.CRM_MON_F}"), + mock.call(f"Dump CIB contents into {constants.CIB_F}"), + mock.call(f"Dump members of this partition into {constants.MEMBERSHIP_F}"), + mock.call(f"Current DC is node1; Touch file 'DC' in workdir") + ]) + + @mock.patch('shutil.copytree') + @mock.patch('os.path.basename') + @mock.patch('os.path.isdir') + @mock.patch('shutil.copy2') + @mock.patch('os.path.isfile') + @mock.patch('crmsh.corosync.conf') + def test_dump_configurations(self, mock_corosync_conf, mock_isfile, mock_copy2, mock_isdir, mock_basename, mock_copytree): + mock_corosync_conf.return_value = "/etc/corosync/corosync.conf" + mock_isfile.side_effect = [True, True, False, True] + mock_isdir.return_value = True + mock_basename.return_value = "drbd.d" + collect.dump_configurations("/opt/workdir") + + @mock.patch('crmsh.report.utils.real_path') + @mock.patch('logging.Logger.debug') + @mock.patch('crmsh.utils.str2file') + @mock.patch('crmsh.report.utils.get_cmd_output') + @mock.patch('crmsh.report.utils.find_files_in_timespan') + def test_collect_corosync_blackbox(self, mock_find_files, mock_get_cmd_output, mock_str2file, mock_debug, mock_real_path): + mock_real_path.return_value = constants.COROSYNC_RECORDER_F + mock_ctx_inst = mock.Mock(work_dir="/opt/workdir") + mock_find_files.return_value = ["/var/lib/corosync/fdata.1"] + mock_get_cmd_output.return_value = "data" + collect.collect_corosync_blackbox(mock_ctx_inst) + mock_debug.assert_called_once_with(f"Dump corosync blackbox info into {constants.COROSYNC_RECORDER_F}") diff --git a/test/unittests/test_report_core.py b/test/unittests/test_report_core.py new file mode 100644 index 0000000000..ba7cd0374f --- /dev/null +++ b/test/unittests/test_report_core.py @@ -0,0 +1,552 @@ +from crmsh import config +from crmsh.report import core, constants, utils, collect + +import sys +import argparse +import unittest +from unittest import mock + + +class TestCapitalizedHelpFormatter(unittest.TestCase): + def setUp(self): + # Initialize the ArgumentParser with the CapitalizedHelpFormatter + self.parser = argparse.ArgumentParser( + formatter_class=core.CapitalizedHelpFormatter, + usage="usage: test" + ) + self.parser.add_argument('--test', help='Test option') + + def test_usage(self): + # Test that the usage is capitalized + usage_text = self.parser.format_usage() + self.assertTrue(usage_text.startswith('Usage: ')) + + def test_section_heading(self): + # Test that section headings are capitalized + section_text = self.parser.format_help() + self.assertTrue('Option' in section_text) + + +class TestContext(unittest.TestCase): + + @mock.patch('crmsh.report.utils.parse_to_timestamp') + @mock.patch('crmsh.report.utils.now') + @mock.patch('crmsh.report.core.config') + def setUp(self, mock_config, mock_now, mock_parse_to_timestamp): + mock_config.report = mock.Mock( + from_time="20230101", + compress=False, + collect_extra_logs="file1 file2", + remove_exist_dest=False, + single_node=False + ) + mock_now.return_value = "12345" + mock_parse_to_timestamp.return_value = "54321" + self.context = core.Context() + self.context.load() + + def test_attribute_setting(self): + self.context.name = "value" + self.assertEqual(self.context.name, "value") + self.context["age"] = 19 + self.assertEqual(self.context.age, 19) + self.context.extra_log_list = ["file3", "file2"] + self.assertEqual(len(self.context.extra_log_list), 3) + + @mock.patch('json.dumps') + def test_str(self, mock_dumps): + mock_dumps.return_value = "json str" + self.assertEqual(self.context.name, "crm_report") + self.assertEqual(self.context.from_time, "54321") + self.assertEqual(str(self.context), "json str") + + +class TestRun(unittest.TestCase): + + @mock.patch('os.path.isdir') + def test_process_dest_dest_not_exist(self, mock_isdir): + mock_isdir.return_value = False + mock_ctx_inst = mock.Mock(dest="/opt/test/report") + with self.assertRaises(utils.ReportGenericError) as err: + core.process_dest(mock_ctx_inst) + self.assertEqual("Directory /opt/test does not exist", str(err.exception)) + + @mock.patch('crmsh.utils.is_filename_sane') + @mock.patch('os.path.basename') + @mock.patch('os.path.isdir') + def test_process_dest_filename_not_sane(self, mock_isdir, mock_basename, mock_sane): + mock_isdir.return_value = True + mock_sane.return_value = False + mock_basename.return_value = "report*" + mock_ctx_inst = mock.Mock(dest="/opt/test/report*") + with self.assertRaises(utils.ReportGenericError) as err: + core.process_dest(mock_ctx_inst) + self.assertEqual("report* is invalid file name", str(err.exception)) + + @mock.patch('crmsh.report.core.pick_compress_prog') + @mock.patch('shutil.rmtree') + @mock.patch('crmsh.utils.is_filename_sane') + @mock.patch('os.path.basename') + @mock.patch('os.path.isdir') + def test_process_dest_dir_exists_rmtree(self, mock_isdir, mock_basename, mock_sane, mock_rmtree, mock_pick): + mock_isdir.side_effect = [True, True] + mock_sane.return_value = True + mock_basename.return_value = "report" + mock_ctx_inst = mock.Mock(dest="/opt/test/report", no_compress=True, rm_exist_dest=True) + core.process_dest(mock_ctx_inst) + mock_rmtree.assert_called_once_with("/opt/test/report") + + @mock.patch('crmsh.report.core.pick_compress_prog') + @mock.patch('crmsh.utils.is_filename_sane') + @mock.patch('os.path.basename') + @mock.patch('os.path.isdir') + def test_process_dest_dir_exists(self, mock_isdir, mock_basename, mock_sane, mock_pick): + mock_isdir.side_effect = [True, True] + mock_sane.return_value = True + mock_basename.return_value = "report" + mock_ctx_inst = mock.Mock(dest="/opt/test/report", no_compress=True, rm_exist_dest=False) + with self.assertRaises(utils.ReportGenericError) as err: + core.process_dest(mock_ctx_inst) + self.assertEqual("Destination directory /opt/test/report exists, please cleanup or use -Z option", str(err.exception)) + + @mock.patch('crmsh.report.core.pick_compress_prog') + @mock.patch('crmsh.utils.is_filename_sane') + @mock.patch('os.path.basename') + @mock.patch('os.path.isdir') + @mock.patch('crmsh.report.utils.now') + def test_process_dest(self, mock_now, mock_isdir, mock_basename, mock_is_sane, mock_pick): + mock_now.return_value = "Mon-28-Aug-2023" + mock_isdir.side_effect = [True, False] + mock_is_sane.return_value = True + mock_basename.return_value = f"report.{mock_now.return_value}" + mock_ctx_inst = mock.Mock(dest=None, no_compress=False, compress_suffix=".bz2", name="report") + + core.process_dest(mock_ctx_inst) + + self.assertEqual(mock_ctx_inst.dest_dir, ".") + mock_is_sane.assert_called_once_with(mock_basename.return_value) + self.assertEqual(mock_ctx_inst.dest_path, "./report.Mon-28-Aug-2023.tar.bz2") + + @mock.patch('crmsh.report.core.pick_first_compress') + def test_pick_compress_prog(self, mock_pick): + mock_pick.return_value = (None, None) + mock_ctx_inst = mock.Mock() + core.pick_compress_prog(mock_ctx_inst) + self.assertEqual(mock_ctx_inst.compress_prog, "cat") + + @mock.patch('shutil.which') + def test_pick_first_compress_return(self, mock_which): + mock_which.return_value = True + prog, ext = core.pick_first_compress() + self.assertEqual(prog, "gzip") + self.assertEqual(ext, ".gz") + mock_which.assert_called_once_with("gzip") + + @mock.patch('logging.Logger.warning') + @mock.patch('shutil.which') + def test_pick_first_compress(self, mock_which, mock_warn): + mock_which.side_effect = [False, False, False] + prog, ext = core.pick_first_compress() + self.assertIsNone(prog) + self.assertIsNone(ext) + + @mock.patch('crmsh.report.utils.get_timespan_str') + @mock.patch('logging.Logger.info') + def test_finalword(self, mock_info, mock_get_timespan): + mock_ctx_inst = mock.Mock(dest_path="./crm_report-Tue-15-Aug-2023.tar.bz2", node_list=["node1", "node2"]) + mock_get_timespan.return_value = "2023-08-14 18:17 - 2023-08-15 06:17" + core.finalword(mock_ctx_inst) + mock_info.assert_has_calls([ + mock.call(f"The report is saved in {mock_ctx_inst.dest_path}"), + mock.call(f"Report timespan: {mock_get_timespan.return_value}"), + mock.call(f"Including nodes: {' '.join(mock_ctx_inst.node_list)}"), + mock.call("Thank you for taking time to create this report") + ]) + + @mock.patch('os.path.basename') + @mock.patch('crmsh.report.core.logger.debug2') + @mock.patch('crmsh.utils.mkdirp') + @mock.patch('crmsh.report.core.is_collector') + @mock.patch('crmsh.report.core.tmpfiles.create_dir') + def test_setup_workdir_collector(self, mock_create_dir, mock_collector, mock_mkdirp, mock_debug, mock_basename): + mock_create_dir.return_value = "/tmp/tmp_dir" + mock_ctx_inst = mock.Mock(dest="/opt/report", work_dir="/opt/work_dir", me="node1") + mock_collector.return_value = True + mock_basename.return_value = "report" + core.setup_workdir(mock_ctx_inst) + mock_debug.assert_called_once_with(f"Setup work directory in {mock_ctx_inst.work_dir}") + + @mock.patch('os.path.basename') + @mock.patch('crmsh.report.core.logger.debug2') + @mock.patch('crmsh.utils.mkdirp') + @mock.patch('crmsh.report.core.is_collector') + @mock.patch('crmsh.report.core.tmpfiles.create_dir') + def test_setup_workdir(self, mock_create_dir, mock_collector, mock_mkdirp, mock_debug, mock_basename): + mock_create_dir.return_value = "/tmp/tmp_dir" + mock_ctx_inst = mock.Mock(dest="/opt/report", work_dir="/opt/work_dir") + mock_collector.return_value = False + mock_basename.return_value = "report" + core.setup_workdir(mock_ctx_inst) + mock_debug.assert_called_once_with(f"Setup work directory in {mock_ctx_inst.work_dir}") + + @mock.patch('os.path.isdir') + @mock.patch('crmsh.report.core.load_from_crmsh_config') + def test_load_context_attributes(self, mock_load, mock_isdir): + mock_ctx_inst = mock.Mock(cib_dir="/var/lib/pacemaker/cib") + mock_isdir.return_value = True + + core.load_context_attributes(mock_ctx_inst) + + self.assertEqual(mock_ctx_inst.pcmk_lib_dir, "/var/lib/pacemaker") + self.assertEqual(mock_ctx_inst.cores_dir_list, ["/var/lib/pacemaker/cores", constants.COROSYNC_LIB]) + + @mock.patch('os.path.isdir') + @mock.patch('crmsh.report.core.config') + def test_load_from_crmsh_config(self, mock_config, mock_isdir): + mock_config.path = mock.Mock( + crm_config="/var/lib/pacemaker/cib", + crm_daemon_dir="/usr/lib/pacemaker", + pe_state_dir="/var/lib/pacemaker/pe" + ) + mock_isdir.side_effect = [True, True, True] + mock_ctx_inst = mock.Mock() + + core.load_from_crmsh_config(mock_ctx_inst) + + self.assertEqual(mock_ctx_inst.cib_dir, mock_config.path.crm_config) + self.assertEqual(mock_ctx_inst.pcmk_exec_dir, mock_config.path.crm_daemon_dir) + self.assertEqual(mock_ctx_inst.pe_dir, mock_config.path.pe_state_dir) + + @mock.patch('os.path.isdir') + @mock.patch('crmsh.report.core.config') + def test_load_from_crmsh_config_exception(self, mock_config, mock_isdir): + mock_config.path = mock.Mock( + crm_config="/var/lib/pacemaker/cib", + ) + mock_isdir.return_value = False + mock_ctx_inst = mock.Mock() + + with self.assertRaises(utils.ReportGenericError) as err: + core.load_from_crmsh_config(mock_ctx_inst) + self.assertEqual(f"Cannot find CIB directory", str(err.exception)) + + def test_adjust_verbosity_debug(self): + mock_ctx_inst = mock.Mock(debug=1) + core.adjust_verbosity(mock_ctx_inst) + + def test_adjust_verbosity(self): + mock_ctx_inst = mock.Mock(debug=0) + config.core.debug = True + core.adjust_verbosity(mock_ctx_inst) + + @mock.patch('crmsh.report.core.adjust_verbosity') + @mock.patch('crmsh.report.core.config') + @mock.patch('json.loads') + @mock.patch('crmsh.report.core.logger.debug2') + def test_load_context(self, mock_debug2, mock_json_loads, mock_config, mock_verbosity): + class Context: + def __str__(self): + return "data" + def __setitem__(self, key, value): + self.__dict__[key] = value + + sys.argv = ["arg1", "arg2", "arg3"] + mock_config.report = mock.Mock(verbosity=None) + mock_json_loads.return_value = {"key": "value", "debug": "true"} + mock_ctx_inst = Context() + core.load_context(mock_ctx_inst) + mock_debug2.assert_called_once_with("Loading context from collector: data") + + @mock.patch('crmsh.report.core.adjust_verbosity') + @mock.patch('crmsh.report.core.process_arguments') + @mock.patch('crmsh.utils.check_empty_option_value') + @mock.patch('crmsh.report.core.add_arguments') + def test_parse_arguments(self, mock_parse, mock_check_space, mock_process, mock_verbosity): + mock_args = mock.Mock(option1="value1") + mock_parse.return_value = mock_args + mock_ctx_inst = mock.Mock() + + core.parse_arguments(mock_ctx_inst) + self.assertEqual(mock_ctx_inst.option1, "value1") + + mock_check_space.assert_called_once_with(mock_args) + mock_process.assert_called_once_with(mock_ctx_inst) + + def test_is_collector(self): + sys.argv = ["report", "__collector"] + self.assertEqual(core.is_collector(), True) + + @mock.patch('crmsh.report.core.push_data') + @mock.patch('crmsh.report.core.collect_logs_and_info') + @mock.patch('crmsh.report.core.setup_workdir') + @mock.patch('crmsh.report.core.load_context') + @mock.patch('crmsh.report.core.is_collector') + @mock.patch('crmsh.report.core.Context') + def test_run_impl_collector(self, mock_context, mock_collector, mock_load, mock_setup, mock_collect_info, mock_push): + mock_context.return_value = mock.Mock() + mock_ctx_inst = mock_context.return_value + mock_collector.side_effect = [True, True] + + core.run_impl() + + mock_context.assert_called_once_with() + mock_collector.assert_has_calls([mock.call(), mock.call()]) + mock_load.assert_called_once_with(mock_ctx_inst) + mock_setup.assert_called_once_with(mock_ctx_inst) + mock_collect_info.assert_called_once_with(mock_ctx_inst) + mock_push.assert_called_once_with(mock_ctx_inst) + + @mock.patch('crmsh.report.core.process_results') + @mock.patch('crmsh.report.core.collect_for_nodes') + @mock.patch('crmsh.report.core.find_ssh_user') + @mock.patch('crmsh.report.core.setup_workdir') + @mock.patch('crmsh.report.core.load_context_attributes') + @mock.patch('crmsh.report.core.parse_arguments') + @mock.patch('crmsh.report.core.is_collector') + @mock.patch('crmsh.report.core.Context') + def test_run_impl(self, mock_context, mock_collector, mock_parse, mock_load, mock_setup, mock_find_ssh, mock_collect, mock_process_results): + mock_context.return_value = mock.Mock() + mock_ctx_inst = mock_context.return_value + mock_collector.side_effect = [False, False] + + core.run_impl() + + mock_context.assert_called_once_with() + mock_collector.assert_has_calls([mock.call(), mock.call()]) + mock_parse.assert_called_once_with(mock_ctx_inst) + mock_load.assert_called_once_with(mock_ctx_inst) + mock_setup.assert_called_once_with(mock_ctx_inst) + mock_find_ssh.assert_called_once_with(mock_ctx_inst) + mock_collect.assert_called_once_with(mock_ctx_inst) + mock_process_results.assert_called_once_with(mock_ctx_inst) + + @mock.patch('logging.Logger.error') + @mock.patch('crmsh.report.core.run_impl') + def test_run_exception_generic(self, mock_run, mock_log_error): + mock_run.side_effect = utils.ReportGenericError("error") + with self.assertRaises(SystemExit) as err: + core.run() + mock_log_error.assert_called_once_with("error") + + @mock.patch('crmsh.report.utils.print_traceback') + @mock.patch('crmsh.report.core.run_impl') + def test_run_exception(self, mock_run, mock_print): + mock_run.side_effect = UnicodeDecodeError("encoding", b'', 0, 1, "error") + with self.assertRaises(SystemExit) as err: + core.run() + mock_print.assert_called_once_with() + + @mock.patch('argparse.HelpFormatter') + @mock.patch('argparse.ArgumentParser') + def test_add_arguments_help(self, mock_argparse, mock_formatter): + mock_argparse_inst = mock.Mock() + mock_argparse.return_value = mock_argparse_inst + mock_args_inst = mock.Mock(help=True) + mock_argparse_inst.parse_args.return_value = mock_args_inst + + with self.assertRaises(SystemExit): + core.add_arguments() + + mock_argparse_inst.print_help.assert_called_once_with() + + @mock.patch('crmsh.report.core.config') + @mock.patch('argparse.HelpFormatter') + @mock.patch('argparse.ArgumentParser') + def test_add_arguments(self, mock_argparse, mock_formatter, mock_config): + mock_argparse_inst = mock.Mock() + mock_argparse.return_value = mock_argparse_inst + mock_args_inst = mock.Mock(help=False, debug=True) + mock_argparse_inst.parse_args.return_value = mock_args_inst + mock_config.report = mock.Mock(verbosity=False) + + core.add_arguments() + + @mock.patch('crmsh.report.core.logger.debug2') + @mock.patch('crmsh.utils.to_ascii') + @mock.patch('crmsh.report.core.ShellUtils') + def test_push_data(self, mock_sh_utils, mock_to_ascii, mock_debug): + mock_sh_utils_inst = mock.Mock() + mock_sh_utils.return_value = mock_sh_utils_inst + mock_sh_utils_inst.get_stdout_stderr.return_value = (0, "data", "error") + mock_to_ascii.return_value = "error" + mock_ctx_inst = mock.Mock(work_dir="/opt/work_dir", main_node="node1", me="node1") + + with self.assertRaises(utils.ReportGenericError) as err: + core.push_data(mock_ctx_inst) + self.assertEqual("error", str(err.exception)) + + mock_debug.assert_called_once_with("Pushing data from node1:/opt/work_dir to node1") + mock_sh_utils_inst.get_stdout_stderr.assert_called_once_with("cd /opt/work_dir/.. && tar -h -c node1", raw=True) + + @mock.patch('crmsh.report.core.finalword') + @mock.patch('shutil.move') + @mock.patch('crmsh.report.utils.create_description_template') + @mock.patch('crmsh.report.utils.analyze') + def test_process_results_no_compress(self, mock_analyze, mock_create, mock_move, mock_final): + mock_ctx_inst = mock.Mock(speed_up=True, work_dir="/opt/work_dir", dest_dir="/opt/user", no_compress=True) + core.process_results(mock_ctx_inst) + mock_analyze.assert_called_once_with(mock_ctx_inst) + mock_create.assert_called_once_with(mock_ctx_inst) + mock_final.assert_called_once_with(mock_ctx_inst) + mock_move.assert_called_once_with(mock_ctx_inst.work_dir, mock_ctx_inst.dest_dir) + + @mock.patch('crmsh.report.core.finalword') + @mock.patch('crmsh.report.core.sh.cluster_shell') + @mock.patch('crmsh.report.core.logger.debug2') + @mock.patch('crmsh.report.utils.create_description_template') + @mock.patch('crmsh.report.utils.analyze') + @mock.patch('crmsh.report.utils.do_sanitize') + def test_process_results(self, mock_sanitize, mock_analyze, mock_create, mock_debug2, mock_run, mock_final): + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + mock_run_inst.get_stdout_or_raise_error = mock.Mock() + mock_ctx_inst = mock.Mock(speed_up=False, work_dir="/opt/work_dir", dest_dir="/opt/user", no_compress=False, dest="report", compress_prog="tar", compress_suffix=".bz2") + core.process_results(mock_ctx_inst) + mock_sanitize.assert_called_once_with(mock_ctx_inst) + mock_analyze.assert_called_once_with(mock_ctx_inst) + mock_create.assert_called_once_with(mock_ctx_inst) + mock_final.assert_called_once_with(mock_ctx_inst) + + @mock.patch('crmsh.report.utils.print_traceback') + @mock.patch('crmsh.report.core.getmembers') + @mock.patch('multiprocessing.cpu_count') + @mock.patch('multiprocessing.Pool') + def test_collect_logs_and_info(self, mock_pool, mock_cpu_count, mock_getmember, mock_print): + mock_cpu_count.return_value = 4 + mock_pool_inst = mock.Mock() + mock_pool.return_value = mock_pool_inst + mock_pool_inst.apply_async = mock.Mock() + mock_async_inst1 = mock.Mock() + mock_async_inst2 = mock.Mock() + mock_pool_inst.apply_async.side_effect = [mock_async_inst1, mock_async_inst2] + mock_async_inst1.get = mock.Mock() + mock_async_inst2.get = mock.Mock(side_effect=ValueError) + mock_pool_inst.close = mock.Mock() + mock_pool_inst.join = mock.Mock() + mock_getmember.return_value = [("collect_func1", None), ("collect_func2", None)] + collect.collect_func1 = mock.Mock() + collect.collect_func2 = mock.Mock() + mock_ctx_inst = mock.Mock() + + core.collect_logs_and_info(mock_ctx_inst) + mock_pool.assert_called_once_with(3) + + @mock.patch('multiprocessing.Process') + @mock.patch('logging.Logger.info') + @mock.patch('crmsh.report.core.start_collector') + def test_collect_for_nodes(self, mock_start_collector, mock_info, mock_process): + mock_ctx_inst = mock.Mock( + node_list=["node1", "node2"], + ssh_askpw_node_list=["node2"], + ssh_user="" + ) + mock_process_inst = mock.Mock() + mock_process.return_value = mock_process_inst + core.collect_for_nodes(mock_ctx_inst) + + def test_process_arguments_value_error(self): + mock_ctx_inst = mock.Mock(from_time=123, to_time=100) + with self.assertRaises(ValueError) as err: + core.process_arguments(mock_ctx_inst) + self.assertEqual("The start time must be before the finish time", str(err.exception)) + + @mock.patch('crmsh.utils.list_cluster_nodes') + def test_process_node_list_exception(self, mock_list_nodes): + mock_ctx_inst = mock.Mock(node_list=[]) + mock_list_nodes.return_value = [] + with self.assertRaises(utils.ReportGenericError) as err: + core.process_node_list(mock_ctx_inst) + self.assertEqual("Could not figure out a list of nodes; is this a cluster node?", str(err.exception)) + + @mock.patch('crmsh.utils.list_cluster_nodes') + def test_process_node_list_single(self, mock_list_nodes): + mock_ctx_inst = mock.Mock(node_list=["node1", "node2"], single=True, me="node1") + core.process_node_list(mock_ctx_inst) + + @mock.patch('logging.Logger.error') + @mock.patch('crmsh.utils.ping_node') + @mock.patch('crmsh.utils.list_cluster_nodes') + def test_process_node_list(self, mock_list_nodes, mock_ping, mock_error): + mock_ctx_inst = mock.Mock(node_list=["node1", "node2"], single=False, me="node1") + mock_ping.side_effect = ValueError("error") + core.process_node_list(mock_ctx_inst) + self.assertEqual(mock_ctx_inst.node_list, ["node1"]) + + @mock.patch('crmsh.report.core.process_node_list') + @mock.patch('crmsh.report.core.process_dest') + def test_process_arguments(self, mock_dest, mock_node_list): + mock_ctx_inst = mock.Mock(from_time=123, to_time=150) + core.process_arguments(mock_ctx_inst) + + @mock.patch('crmsh.report.core.logger.debug2') + @mock.patch('logging.Logger.warning') + @mock.patch('logging.Logger.debug') + @mock.patch('crmsh.utils.check_ssh_passwd_need') + @mock.patch('crmsh.report.core.userdir.getuser') + @mock.patch('crmsh.report.core.userdir.get_sudoer') + def test_find_ssh_user_not_found(self, mock_get_sudoer, mock_getuser, mock_check_ssh, mock_debug, mock_warn, mock_debug2): + mock_get_sudoer.return_value = "" + mock_getuser.return_value = "user2" + mock_check_ssh.return_value = True + mock_ctx_inst = mock.Mock(ssh_user="", ssh_askpw_node_list=[], node_list=["node1", "node2"], me="node1") + core.find_ssh_user(mock_ctx_inst) + mock_warn.assert_called_once_with(f"passwordless ssh to node(s) ['node2'] does not work") + + @mock.patch('crmsh.report.core.logger.debug2') + @mock.patch('logging.Logger.warning') + @mock.patch('logging.Logger.debug') + @mock.patch('crmsh.utils.check_ssh_passwd_need') + @mock.patch('crmsh.utils.this_node') + @mock.patch('crmsh.report.core.userdir.getuser') + @mock.patch('crmsh.report.core.userdir.get_sudoer') + def test_find_ssh_user(self, mock_get_sudoer, mock_getuser, mock_this_node, mock_check_ssh, mock_debug, mock_warn, mock_debug2): + mock_get_sudoer.return_value = "user1" + mock_getuser.return_value = "user2" + mock_this_node.return_value = "node1" + mock_check_ssh.return_value = False + mock_ctx_inst = mock.Mock(ssh_user="", ssh_askpw_node_list=[], node_list=["node1", "node2"]) + core.find_ssh_user(mock_ctx_inst) + self.assertEqual("sudo", mock_ctx_inst.sudo) + self.assertEqual("user1", mock_ctx_inst.ssh_user) + + @mock.patch('logging.Logger.warning') + @mock.patch('crmsh.report.core.ShellUtils') + def test_start_collector_return(self, mock_sh_utils, mock_warn): + mock_sh_utils_inst = mock.Mock() + mock_sh_utils.return_value = mock_sh_utils_inst + mock_sh_utils_inst.get_stdout_stderr.return_value = (0, '', None) + mock_ctx_inst = mock.Mock(me="node1") + core.start_collector("node1", mock_ctx_inst) + mock_sh_utils_inst.get_stdout_stderr.assert_called_once_with(f"{constants.BIN_COLLECTOR} '{mock_ctx_inst}'") + + @mock.patch('logging.Logger.warning') + @mock.patch('crmsh.report.core.ShellUtils') + @mock.patch('crmsh.report.core.sh.LocalShell') + @mock.patch('crmsh.utils.this_node') + def test_start_collector_warn(self, mock_this_node, mock_sh, mock_sh_utils, mock_warn): + mock_sh_utils_inst = mock.Mock() + mock_sh_utils.return_value = mock_sh_utils_inst + mock_sh_utils_inst.get_stdout = mock.Mock() + mock_sh_inst = mock.Mock() + mock_sh.return_value = mock_sh_inst + mock_sh_inst.get_rc_stdout_stderr.return_value = (1, '', "error") + mock_ctx_inst = mock.Mock(ssh_user='', sudo='') + mock_this_node.return_value = "node2" + core.start_collector("node1", mock_ctx_inst) + mock_warn.assert_called_once_with("error") + + @mock.patch('ast.literal_eval') + @mock.patch('crmsh.report.core.sh.LocalShell') + @mock.patch('crmsh.report.core.ShellUtils') + @mock.patch('crmsh.utils.this_node') + def test_start_collector(self, mock_this_node, mock_sh_utils, mock_sh, mock_eval): + mock_sh_utils_inst = mock.Mock() + mock_sh_utils.return_value = mock_sh_utils_inst + mock_sh_utils_inst.get_stdout = mock.Mock() + mock_sh_inst = mock.Mock() + mock_sh.return_value = mock_sh_inst + mock_sh_inst.get_rc_stdout_stderr.return_value = (0, f"line1\n{constants.COMPRESS_DATA_FLAG}data", None) + mock_ctx_inst = mock.Mock(ssh_user='', sudo='') + mock_this_node.return_value = "node2" + mock_eval.return_value = "data" + core.start_collector("node1", mock_ctx_inst) diff --git a/test/unittests/test_report_utils.py b/test/unittests/test_report_utils.py new file mode 100644 index 0000000000..6a08749603 --- /dev/null +++ b/test/unittests/test_report_utils.py @@ -0,0 +1,832 @@ +import sys +import datetime +from crmsh import config +from crmsh import utils as crmutils +from crmsh.report import utils, constants + +import unittest +from unittest import mock + + +class TestPackage(unittest.TestCase): + + @mock.patch('crmsh.report.utils.get_pkg_mgr') + def setUp(self, mock_get_pkg_mgr): + mock_get_pkg_mgr.side_effect = [None, "rpm", "deb"] + self.inst_none = utils.Package("xxx1 xxx2") + self.inst = utils.Package("rpm1 rpm2") + self.inst_deb = utils.Package("deb1 deb2") + + def test_version_return(self): + res = self.inst_none.version() + self.assertEqual(res, "") + + @mock.patch('crmsh.report.utils.Package.pkg_ver_rpm') + def test_version(self, mock_ver_rpm): + mock_ver_rpm.return_value = "version1" + res = self.inst.version() + self.assertEqual(res, "version1") + + @mock.patch('crmsh.report.utils.ShellUtils') + def test_version_rpm(self, mock_run): + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + data = "rpm1-4.5.0\nrpm2 not installed" + mock_run_inst.get_stdout_stderr.return_value = (0, data, None) + res = self.inst.pkg_ver_rpm() + self.assertEqual(res, "rpm1-4.5.0") + + @mock.patch('crmsh.report.utils.ShellUtils') + def test_version_deb(self, mock_run): + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + data = "deb1-4.5.0\nno packages found" + mock_run_inst.get_stdout_stderr.return_value = (0, data, None) + res = self.inst_deb.pkg_ver_deb() + self.assertEqual(res, "deb1-4.5.0") + + def test_verify_return(self): + res = self.inst_none.verify() + self.assertEqual(res, "") + + @mock.patch('crmsh.report.utils.Package.verify_rpm') + def test_verify(self, mock_verify_rpm): + mock_verify_rpm.return_value = "" + res = self.inst.verify() + self.assertEqual(res, "") + + @mock.patch('crmsh.report.utils.ShellUtils') + def test_verify_rpm(self, mock_run): + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + mock_run_inst.get_stdout_stderr.return_value = (0, "verify data\nThis is not installed","") + res = self.inst.verify_rpm() + self.assertEqual(res, "verify data") + + @mock.patch('crmsh.report.utils.ShellUtils') + def test_verify_deb(self, mock_run): + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + mock_run_inst.get_stdout_stderr.return_value = (0, "verify data\nThis is not installed","") + res = self.inst_deb.verify_deb() + self.assertEqual(res, "verify data") + + +class TestSanitizer(unittest.TestCase): + + def setUp(self): + mock_ctx_inst_no_sanitize = mock.Mock(sanitize=False) + self.s_inst_no_sanitize = utils.Sanitizer(mock_ctx_inst_no_sanitize) + + mock_ctx_inst_no_sanitize_set = mock.Mock(sensitive_regex_list=[]) + self.s_inst_no_sanitize_set = utils.Sanitizer(mock_ctx_inst_no_sanitize_set) + + mock_ctx_inst = mock.Mock(sanitize=True, work_dir="/opt", sensitive_regex_list=["test_patt"]) + self.s_inst = utils.Sanitizer(mock_ctx_inst) + + @mock.patch('logging.Logger.warning') + @mock.patch('crmsh.report.utils.Sanitizer._include_sensitive_data') + @mock.patch('crmsh.report.utils.Sanitizer._extract_sensitive_value_list') + @mock.patch('crmsh.report.utils.Sanitizer._parse_sensitive_set') + @mock.patch('crmsh.report.utils.Sanitizer._load_cib_from_work_dir') + def test_prepare_return(self, mock_load_cib, mock_parse, mock_extract, mock_include, mock_warning): + mock_include.return_value = True + self.s_inst_no_sanitize.prepare() + mock_load_cib.assert_called_once_with() + mock_parse.assert_called_once_with() + mock_warning.assert_has_calls([ + mock.call("Some PE/CIB/log files contain possibly sensitive data"), + mock.call("Using \"-s\" option can replace sensitive data") + ]) + + @mock.patch('crmsh.report.utils.Sanitizer._get_file_list_in_work_dir') + @mock.patch('crmsh.report.utils.Sanitizer._include_sensitive_data') + @mock.patch('crmsh.report.utils.Sanitizer._extract_sensitive_value_list') + @mock.patch('crmsh.report.utils.Sanitizer._parse_sensitive_set') + @mock.patch('crmsh.report.utils.Sanitizer._load_cib_from_work_dir') + def test_prepare(self, mock_load_cib, mock_parse, mock_extract, mock_include, mock_get_file): + mock_include.return_value = True + self.s_inst.prepare() + mock_load_cib.assert_called_once_with() + mock_parse.assert_called_once_with() + mock_get_file.assert_called_once_with + + @mock.patch('crmsh.report.utils.Sanitizer._include_sensitive_data') + @mock.patch('crmsh.report.utils.Sanitizer._extract_sensitive_value_list') + @mock.patch('crmsh.report.utils.Sanitizer._parse_sensitive_set') + @mock.patch('crmsh.report.utils.Sanitizer._load_cib_from_work_dir') + def test_prepare_no_sensitive_data(self, mock_load_cib, mock_parse, mock_extract, mock_include): + mock_include.return_value = False + self.s_inst.prepare() + mock_load_cib.assert_called_once_with() + mock_parse.assert_called_once_with() + + def test_include_sensitive_data(self): + res = self.s_inst._include_sensitive_data() + self.assertEqual(res, []) + + @mock.patch('os.walk') + def test_get_file_list_in_work_dir(self, mock_walk): + mock_walk.return_value = [ + ("/opt", [], ["file1", "file2"]), + ("/opt/dir1", [], ["file3"]), + ] + self.s_inst._get_file_list_in_work_dir() + self.assertEqual(self.s_inst.file_list_in_workdir, ['/opt/file1', '/opt/file2', '/opt/dir1/file3']) + + @mock.patch('glob.glob') + def test_load_cib_from_work_dir_no_cib(self, mock_glob): + mock_glob.return_value = [] + with self.assertRaises(utils.ReportGenericError) as err: + self.s_inst._load_cib_from_work_dir() + self.assertEqual(f"CIB file {constants.CIB_F} was not collected", str(err.exception)) + + @mock.patch('glob.glob') + @mock.patch('crmsh.utils.read_from_file') + def test_load_cib_from_work_dir_empty(self, mock_read, mock_glob): + mock_glob.return_value = [f"/opt/node1/{constants.CIB_F}"] + mock_read.return_value = None + with self.assertRaises(utils.ReportGenericError) as err: + self.s_inst._load_cib_from_work_dir() + self.assertEqual(f"File /opt/node1/{constants.CIB_F} is empty", str(err.exception)) + mock_read.assert_called_once_with(f"/opt/node1/{constants.CIB_F}") + + @mock.patch('glob.glob') + @mock.patch('crmsh.utils.read_from_file') + def test_load_cib_from_work_dir(self, mock_read, mock_glob): + mock_glob.return_value = [f"/opt/node1/{constants.CIB_F}"] + mock_read.return_value = "data" + self.s_inst._load_cib_from_work_dir() + self.assertEqual(self.s_inst.cib_data, "data") + mock_read.assert_called_once_with(f"/opt/node1/{constants.CIB_F}") + + @mock.patch('crmsh.report.utils.logger.debug2') + def test_parse_sensitive_set_no_set(self, mock_debug2): + config.report.sanitize_rule = "" + self.s_inst_no_sanitize_set._parse_sensitive_set() + self.assertEqual(self.s_inst_no_sanitize_set.sensitive_regex_set, set(utils.Sanitizer.DEFAULT_RULE_LIST)) + mock_debug2.assert_called_once_with(f"Regex set to match sensitive data: {set(utils.Sanitizer.DEFAULT_RULE_LIST)}") + + @mock.patch('crmsh.report.utils.logger.debug2') + def test_parse_sensitive_set(self, mock_debug2): + config.report.sanitize_rule = "passw.*" + self.s_inst._parse_sensitive_set() + self.assertEqual(self.s_inst.sensitive_regex_set, set(['test_patt', 'passw.*'])) + mock_debug2.assert_called_once_with(f"Regex set to match sensitive data: {set(['test_patt', 'passw.*'])}") + + def test_sanitize_return(self): + self.s_inst_no_sanitize.sanitize() + + @mock.patch('crmsh.report.utils.write_to_file') + @mock.patch('logging.Logger.debug') + @mock.patch('crmsh.report.utils.Sanitizer._sub_sensitive_string') + @mock.patch('crmsh.utils.read_from_file') + def test_sanitize(self, mock_read, mock_sub, mock_debug, mock_write): + self.s_inst.file_list_in_workdir = ["file1", "file2"] + mock_read.side_effect = [None, "data"] + mock_sub.return_value = "replace_data" + self.s_inst.sanitize() + mock_debug.assert_called_once_with("Replace sensitive info for %s", "file2") + + def test_extract_from_cib(self): + self.s_inst.cib_data = """ + + + + """ + res = self.s_inst._extract_from_cib("passw.*") + self.assertEqual(res, ["qwertyui"]) + + def test_sub_sensitive_string(self): + data = """ + + + + + This my tel 13356789876 + """ + self.s_inst.sensitive_value_list_with_raw_option = ["13356789876"] + self.s_inst.sensitive_key_list = ["passw.*"] + self.s_inst.sensitive_value_list = ["qwertyui"] + res = self.s_inst._sub_sensitive_string(data) + expected_data = """ + + + + + This my tel ****** + """ + self.assertEqual(res, expected_data) + + @mock.patch('logging.Logger.warning') + def test_extract_sensitive_value_list_warn(self, mock_warn): + self.s_inst.sensitive_regex_set = set(["TEL:test"]) + self.s_inst._extract_sensitive_value_list() + mock_warn.assert_called_once_with("For sanitize pattern TEL:test, option should be \"raw\"") + + @mock.patch('crmsh.report.utils.Sanitizer._extract_from_cib') + def test_extract_sensitive_value_list(self, mock_extract): + mock_extract.side_effect = [["123456"], ["qwertyui"]] + self.s_inst.sensitive_regex_set = set(["TEL:raw", "passw.*"]) + self.s_inst._extract_sensitive_value_list() + +class TestUtils(unittest.TestCase): + + @mock.patch('builtins.sorted', side_effect=lambda x, *args, **kwargs: x[::-1]) + @mock.patch('crmsh.report.utils.get_timespan_str') + @mock.patch('crmsh.report.utils.logger.debug2') + @mock.patch('glob.glob') + @mock.patch('crmsh.report.utils.is_our_log') + def test_arch_logs(self, mock_is_our_log, mock_glob, mock_debug2, mock_timespan, mock_sorted): + mock_is_our_log.return_value = utils.LogType.GOOD + mock_glob.return_value = [] + mock_ctx_inst = mock.Mock() + mock_timespan.return_value = "0101-0202" + + return_list, log_type = utils.arch_logs(mock_ctx_inst, "file1") + + self.assertEqual(return_list, ["file1"]) + self.assertEqual(log_type, utils.LogType.GOOD) + mock_debug2.assert_called_once_with("Found logs ['file1'] in 0101-0202") + + @mock.patch('sys.stdout.flush') + @mock.patch('traceback.print_exc') + def test_print_traceback(self, mock_trace, mock_flush): + utils.print_traceback() + mock_trace.assert_called_once_with() + + @mock.patch('crmsh.report.utils.ts_to_str') + def test_get_timespan_str(self, mock_ts_to_str): + mock_ctx_inst = mock.Mock(from_time=1691938980.0, to_time=1691982180.0) + mock_ts_to_str.side_effect = ["2023-08-13 23:03", "2023-08-14 11:03"] + res = utils.get_timespan_str(mock_ctx_inst) + self.assertEqual(res, "2023-08-13 23:03 - 2023-08-14 11:03") + mock_ts_to_str.assert_has_calls([ + mock.call(mock_ctx_inst.from_time), + mock.call(mock_ctx_inst.to_time) + ]) + + @mock.patch('crmsh.report.utils.ShellUtils') + def test_get_cmd_output(self, mock_run): + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + mock_run_inst.get_stdout_stderr.return_value = (0, "stdout_data", "stderr_data") + res = utils.get_cmd_output("cmd") + self.assertEqual(res, "stdout_data\nstderr_data\n") + mock_run_inst.get_stdout_stderr.assert_called_once_with("cmd", timeout=None) + + @mock.patch('crmsh.utils.read_from_file') + def test_is_our_log_empty(self, mock_read): + mock_read.return_value = None + mock_ctx_inst = mock.Mock() + res = utils.is_our_log(mock_ctx_inst, "/opt/logfile") + self.assertEqual(res, utils.LogType.EMPTY) + mock_read.assert_called_once_with("/opt/logfile") + + @mock.patch('crmsh.report.utils.determin_log_format') + @mock.patch('crmsh.utils.read_from_file') + def test_is_our_log_irregular(self, mock_read, mock_log_format): + mock_read.return_value = "This is the log" + mock_ctx_inst = mock.Mock() + mock_log_format.return_value = None + res = utils.is_our_log(mock_ctx_inst, "/opt/logfile") + self.assertEqual(res, utils.LogType.IRREGULAR) + mock_read.assert_called_once_with("/opt/logfile") + mock_log_format.assert_called_once_with(mock_read.return_value) + + @mock.patch('crmsh.report.utils.find_first_timestamp') + @mock.patch('crmsh.report.utils.head') + @mock.patch('crmsh.report.utils.determin_log_format') + @mock.patch('crmsh.utils.read_from_file') + def test_is_our_log_before(self, mock_read, mock_determine, mock_head, mock_find_first): + mock_read.return_value = "data" + mock_determine.return_value = "rfc5424" + mock_find_first.side_effect = [1000, 1500] + mock_ctx_inst = mock.Mock(from_time=1600, to_time=1800) + res = utils.is_our_log(mock_ctx_inst, "/var/log/pacemaker.log") + self.assertEqual(res, utils.LogType.BEFORE_TIMESPAN) + + @mock.patch('crmsh.report.utils.find_first_timestamp') + @mock.patch('crmsh.report.utils.head') + @mock.patch('crmsh.report.utils.determin_log_format') + @mock.patch('crmsh.utils.read_from_file') + def test_is_our_log_good(self, mock_read, mock_determine, mock_head, mock_find_first): + mock_read.return_value = "data" + mock_determine.return_value = "rfc5424" + mock_find_first.side_effect = [1000, 1500] + mock_ctx_inst = mock.Mock(from_time=1200, to_time=1800) + res = utils.is_our_log(mock_ctx_inst, "/var/log/pacemaker.log") + self.assertEqual(res, utils.LogType.GOOD) + + @mock.patch('crmsh.report.utils.find_first_timestamp') + @mock.patch('crmsh.report.utils.head') + @mock.patch('crmsh.report.utils.determin_log_format') + @mock.patch('crmsh.utils.read_from_file') + def test_is_our_log_after(self, mock_read, mock_determine, mock_head, mock_find_first): + mock_read.return_value = "data" + mock_determine.return_value = "rfc5424" + mock_find_first.side_effect = [1000, 1500] + mock_ctx_inst = mock.Mock(from_time=200, to_time=800) + res = utils.is_our_log(mock_ctx_inst, "/var/log/pacemaker.log") + self.assertEqual(res, utils.LogType.AFTER_TIMESPAN) + + @mock.patch('logging.Logger.warning') + @mock.patch('shutil.which') + def test_get_pkg_mgr_unknown(self, mock_which, mock_warning): + mock_which.side_effect = [False, False] + self.assertEqual(utils.get_pkg_mgr(), "") + mock_warning.assert_called_once_with("Unknown package manager!") + + @mock.patch('shutil.which') + def test_get_pkg_mgr(self, mock_which): + mock_which.return_value = True + utils.get_pkg_mgr() + self.assertEqual(utils.get_pkg_mgr(), "rpm") + + @mock.patch('os.walk') + @mock.patch('os.stat') + @mock.patch('os.path.isdir') + def test_find_files_in_timespan(self, mock_isdir, mock_stat, mock_walk): + mock_isdir.side_effect = [True, False] + mock_stat.return_value = mock.Mock(st_ctime=1615) + mock_walk.return_value = [ + ('/mock_dir', [], ['file1.txt', 'file2.txt']) + ] + mock_ctx_inst = mock.Mock(from_time=1611, to_time=1620) + + res = utils.find_files_in_timespan(mock_ctx_inst, ['/mock_dir', '/not_exist']) + + expected_result = ['/mock_dir/file1.txt', '/mock_dir/file2.txt'] + self.assertEqual(res, expected_result) + + @mock.patch('crmsh.report.utils.get_timespan_str') + @mock.patch('crmsh.report.utils.logger.debug2') + @mock.patch('crmsh.report.utils.arch_logs') + def test_dump_logset_return(self, mock_arch, mock_debug, mock_timespan): + mock_arch.return_value = [[], ""] + mock_ctx_inst = mock.Mock() + utils.dump_logset(mock_ctx_inst, "file") + + @mock.patch('crmsh.report.utils.real_path') + @mock.patch('logging.Logger.debug') + @mock.patch('crmsh.utils.str2file') + @mock.patch('os.path.basename') + @mock.patch('crmsh.report.utils.print_logseg') + @mock.patch('crmsh.report.utils.arch_logs') + def test_dump_logset_irrgular(self, mock_arch, mock_print, mock_basename, mock_str2file, mock_debug, mock_real_path): + mock_real_path.return_value = "file1" + mock_arch.return_value = [["file1"], utils.LogType.IRREGULAR] + mock_ctx_inst = mock.Mock(work_dir="/opt/work_dir") + mock_basename.return_value = "file1" + mock_print.return_value = "data" + utils.dump_logset(mock_ctx_inst, "file1") + mock_print.assert_called_once_with("file1", 0, 0) + mock_str2file.assert_called_once_with("data", "/opt/work_dir/file1") + mock_debug.assert_called_once_with("Dump file1 into file1") + + @mock.patch('crmsh.report.utils.real_path') + @mock.patch('logging.Logger.debug') + @mock.patch('crmsh.utils.str2file') + @mock.patch('os.path.basename') + @mock.patch('crmsh.report.utils.print_logseg') + @mock.patch('crmsh.report.utils.arch_logs') + def test_dump_logset_one(self, mock_arch, mock_print, mock_basename, mock_str2file, mock_debug, mock_real_path): + mock_real_path.return_value = "file1" + mock_arch.return_value = [["file1"], utils.LogType.GOOD] + mock_ctx_inst = mock.Mock(work_dir="/opt/work_dir", from_time=10, to_time=20) + mock_basename.return_value = "file1" + mock_print.return_value = "data" + + utils.dump_logset(mock_ctx_inst, "file1") + + mock_print.assert_called_once_with("file1", 10, 20) + mock_str2file.assert_called_once_with("data", "/opt/work_dir/file1") + mock_debug.assert_called_once_with("Dump file1 into file1") + + @mock.patch('crmsh.report.utils.real_path') + @mock.patch('logging.Logger.debug') + @mock.patch('crmsh.utils.str2file') + @mock.patch('os.path.basename') + @mock.patch('crmsh.report.utils.print_logseg') + @mock.patch('crmsh.report.utils.arch_logs') + def test_dump_logset(self, mock_arch, mock_print, mock_basename, mock_str2file, mock_debug, mock_real_path): + mock_real_path.return_value = "file1" + mock_arch.return_value = [["file1", "file2", "file3"], utils.LogType.GOOD] + mock_ctx_inst = mock.Mock(work_dir="/opt/work_dir", from_time=10, to_time=20) + mock_basename.return_value = "file1" + mock_print.side_effect = ["data1\n", "data2\n", "data3\n"] + + utils.dump_logset(mock_ctx_inst, "file1") + + mock_print.assert_has_calls([ + mock.call("file3", 10, 0), + mock.call("file2", 0, 0), + mock.call("file1", 0, 20), + ]) + mock_str2file.assert_called_once_with("data1\ndata2\ndata3", "/opt/work_dir/file1") + mock_debug.assert_called_once_with("Dump file1 into file1") + + @mock.patch('crmsh.utils.read_from_file') + @mock.patch('os.path.exists') + @mock.patch('crmsh.report.utils.logger.debug2') + def test_get_distro_info(self, mock_debug2, mock_exists, mock_read): + mock_exists.return_value = True + mock_read.return_value = """ +VERSION_ID="20230629" +PRETTY_NAME="openSUSE Tumbleweed" +ANSI_COLOR="0;32" + """ + res = utils.get_distro_info() + self.assertEqual(res, "openSUSE Tumbleweed") + + @mock.patch('shutil.which') + @mock.patch('crmsh.report.utils.sh.LocalShell') + @mock.patch('os.path.exists') + @mock.patch('crmsh.report.utils.logger.debug2') + def test_get_distro_info_lsb(self, mock_debug2, mock_exists, mock_sh, mock_which): + mock_which.return_value = True + mock_exists.return_value = False + mock_sh_inst = mock.Mock() + mock_sh.return_value = mock_sh_inst + mock_sh_inst.get_stdout_or_raise_error.return_value = "data" + res = utils.get_distro_info() + self.assertEqual(res, "Unknown") + + @mock.patch('crmsh.report.utils.get_timestamp') + def test_find_first_timestamp_none(self, mock_get_timestamp): + mock_get_timestamp.side_effect = [None, None] + data = ["line1", "line2"] + self.assertIsNone(utils.find_first_timestamp(data)) + mock_get_timestamp.assert_has_calls([ + mock.call("line1"), + mock.call("line2") + ]) + + @mock.patch('crmsh.report.utils.get_timestamp') + def test_find_first_timestamp(self, mock_get_timestamp): + mock_get_timestamp.return_value = 123456 + data = ["line1", "line2"] + res = utils.find_first_timestamp(data) + self.assertEqual(res, 123456) + mock_get_timestamp.assert_called_once_with("line1") + + def test_filter_lines(self): + data = """line1 +line2 +line3 +line4 +line5 + """ + res = utils.filter_lines(data, 2, 4) + self.assertEqual(res, 'line2\nline3\nline4\n') + + @mock.patch('crmsh.utils.parse_time') + @mock.patch('crmsh.report.utils.head') + def test_determin_log_format_none(self, mock_head, mock_parse): + mock_head.return_value = ["line1", "line2"] + mock_parse.side_effect = [None, None] + data = """line1 +line2 + """ + self.assertIsNone(utils.determin_log_format(data)) + + def test_determin_log_format_rfc5424(self): + data = """ +2003-10-11T22:14:15.003Z mymachine.example.com su + """ + res = utils.determin_log_format(data) + self.assertEqual(res, "rfc5424") + + def test_determin_log_format_syslog(self): + data = """ +Feb 12 18:30:08 15sp1-1 kernel: + """ + res = utils.determin_log_format(data) + self.assertEqual(res, "syslog") + + @mock.patch('crmsh.utils.parse_time') + @mock.patch('crmsh.report.utils.head') + def test_determin_log_format_legacy(self, mock_head, mock_parse): + mock_head.return_value = ["Legacy 2003-10-11T22:14:15.003Z log"] + mock_parse.side_effect = [None, None, 123456] + data = """ +Legacy 003-10-11T22:14:15.003Z log data log + """ + res = utils.determin_log_format(data) + self.assertEqual(res, "legacy") + mock_parse.assert_has_calls([ + mock.call("Legacy 2003-10-11T22:14:15.003Z log", quiet=True), + mock.call("Legacy", quiet=True), + mock.call("2003-10-11T22:14:15.003Z", quiet=True) + ]) + + def test_get_timestamp_none(self): + self.assertIsNone(utils.get_timestamp("")) + + @mock.patch('crmsh.utils.parse_to_timestamp') + def test_get_timespan_rfc5424(self, mock_parse): + constants.STAMP_TYPE = "rfc5424" + mock_parse.return_value = 12345 + res = utils.get_timestamp("2003-10-11T22:14:15.003Z mymachine.example.com su") + self.assertEqual(res, mock_parse.return_value) + mock_parse.assert_called_once_with("2003-10-11T22:14:15.003Z", quiet=True) + + @mock.patch('crmsh.utils.parse_to_timestamp') + def test_get_timespan_syslog(self, mock_parse): + constants.STAMP_TYPE = "syslog" + mock_parse.return_value = 12345 + res = utils.get_timestamp("Feb 12 18:30:08 15sp1-1 kernel:") + self.assertEqual(res, mock_parse.return_value) + mock_parse.assert_called_once_with("Feb 12 18:30:08", quiet=True) + + @mock.patch('crmsh.utils.parse_to_timestamp') + def test_get_timespan_legacy(self, mock_parse): + constants.STAMP_TYPE = "legacy" + mock_parse.return_value = 12345 + res = utils.get_timestamp("legacy 2003-10-11T22:14:15.003Z log data") + self.assertEqual(res, mock_parse.return_value) + mock_parse.assert_called_once_with("2003-10-11T22:14:15.003Z", quiet=True) + + @mock.patch('crmsh.report.utils.diff_check') + def test_do_compare(self, mock_diff): + mock_ctx_inst = mock.Mock(work_dir="/opt/workdir", node_list=["node1", "node2"]) + mock_diff.side_effect = [[0, ""], [0, ""]] + rc, out = utils.do_compare(mock_ctx_inst, "file1") + self.assertEqual(rc, 0) + self.assertEqual(out, "") + mock_diff.assert_called_once_with("/opt/workdir/node1/file1", "/opt/workdir/node2/file1") + + @mock.patch('os.path.isfile') + def test_diff_check_return(self, mock_isfile): + mock_isfile.return_value = False + rc, out = utils.diff_check("/opt/file1", "/opt/fil2") + self.assertEqual(rc, 1) + self.assertEqual(out, "/opt/file1 does not exist\n") + + @mock.patch('crmsh.report.utils.cib_diff') + @mock.patch('os.path.basename') + @mock.patch('os.path.isfile') + def test_diff_check(self, mock_isfile, mock_basename, mock_cib_diff): + mock_isfile.side_effect = [True, True] + mock_basename.return_value = "cib.xml" + mock_cib_diff.return_value = (0, "") + rc, out = utils.diff_check("/opt/node1/cib.xml", "/opt/node2/cib.xml") + self.assertEqual(rc, 0) + self.assertEqual(out, "") + + @mock.patch('crmsh.report.utils.ShellUtils') + def test_txt_diff(self, mock_run): + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + mock_run_inst.get_stdout_stderr.return_value = (0, "", None) + rc, out = utils.txt_diff("txt1", "txt2") + self.assertEqual(rc, 0) + self.assertEqual(out, "") + + @mock.patch('os.path.isfile') + def test_cib_diff_not_running(self, mock_isfile): + mock_isfile.side_effect = [True, False, False, True] + rc, out = utils.cib_diff("/opt/node1/cib.xml", "/opt/node2/cib.xml") + self.assertEqual(rc, 1) + self.assertEqual(out, "Can't compare cibs from running and stopped systems\n") + + @mock.patch('crmsh.report.utils.ShellUtils') + @mock.patch('os.path.isfile') + def test_cib_diff(self, mock_isfile, mock_run): + mock_run_inst = mock.Mock() + mock_run.return_value = mock_run_inst + mock_isfile.side_effect = [True, True] + mock_run_inst.get_stdout_stderr.return_value = (0, "", None) + rc, out = utils.cib_diff("/opt/node1/cib.xml", "/opt/node2/cib.xml") + self.assertEqual(rc, 0) + self.assertEqual(out, "") + mock_run_inst.get_stdout_stderr.assert_called_once_with("crm_diff -c -n /opt/node1/cib.xml -o /opt/node2/cib.xml") + + @mock.patch('os.symlink') + @mock.patch('shutil.move') + @mock.patch('os.remove') + @mock.patch('os.path.isfile') + def test_consolidate(self, mock_isfile, mock_remove, mock_move, mock_symlink): + mock_isfile.side_effect = [True, False] + mock_ctx_inst = mock.Mock(node_list=["node1", "node2"], work_dir="/opt/workdir") + utils.consolidate(mock_ctx_inst, "target_file") + mock_isfile.assert_has_calls([ + mock.call("/opt/workdir/target_file"), + mock.call("/opt/workdir/target_file") + ]) + mock_symlink.assert_has_calls([ + mock.call('../target_file', '/opt/workdir/node1/target_file'), + mock.call('../target_file', '/opt/workdir/node2/target_file') + ]) + + @mock.patch('crmsh.report.utils.Sanitizer') + def test_do_sanitize(self, mock_sanitizer): + mock_inst = mock.Mock() + mock_sanitizer.return_value = mock_inst + mock_ctx_inst = mock.Mock() + utils.do_sanitize(mock_ctx_inst) + mock_inst.prepare.assert_called_once_with() + mock_inst.sanitize.assert_called_once_with() + + @mock.patch('crmsh.utils.read_from_file') + def test_print_logseg_empty(self, mock_read): + mock_read.return_value = "" + res = utils.print_logseg("log1", 1234, 0) + self.assertEqual(res, "") + + @mock.patch('crmsh.report.utils.findln_by_timestamp') + @mock.patch('crmsh.utils.read_from_file') + def test_print_logseg_none(self, mock_read, mock_findln): + mock_read.return_value = "data" + mock_findln.return_value = None + res = utils.print_logseg("log1", 1234, 0) + self.assertEqual(res, "") + + @mock.patch('crmsh.report.utils.filter_lines') + @mock.patch('crmsh.report.utils.logger.debug2') + @mock.patch('crmsh.report.utils.findln_by_timestamp') + @mock.patch('crmsh.utils.read_from_file') + def test_print_logseg(self, mock_read, mock_findln, mock_debug, mock_filter): + mock_read.return_value = "line1\nline2\nline3" + mock_filter.return_value = "line1\nline2\nline3" + res = utils.print_logseg("log1", 0, 0) + self.assertEqual(res, mock_filter.return_value) + mock_debug.assert_called_once_with("Including segment [%d-%d] from %s", 1, 3, "log1") + + def test_head(self): + data = "line1\nline2\nline3" + res = utils.head(2, data) + self.assertEqual(res, ["line1", "line2"]) + + def test_tail(self): + data = "line1\nline2\nline3" + res = utils.tail(2, data) + self.assertEqual(res, ["line2", "line3"]) + + @mock.patch('crmsh.utils.get_open_method') + @mock.patch('builtins.open', create=True) + def test_write_to_file(self, mock_open, mock_method): + mock_method.return_value = mock_open + file_handle = mock_open.return_value.__enter__.return_value + utils.write_to_file('Hello', 'file.txt') + mock_open.assert_called_once_with('file.txt', 'w') + file_handle.write.assert_called_once_with('Hello') + + @mock.patch('gzip.open') + @mock.patch('crmsh.utils.get_open_method') + def test_write_to_file_encode(self, mock_method, mock_open): + mock_method.return_value = mock_open + file_handle = mock_open.return_value.__enter__.return_value + utils.write_to_file('Hello', 'file.txt') + mock_open.assert_called_once_with('file.txt', 'w') + file_handle.write.assert_called_once_with(b'Hello') + + @mock.patch('crmsh.report.utils.dt_to_str') + @mock.patch('crmsh.report.utils.ts_to_dt') + def test_ts_to_str(self, mock_ts_to_dt, mock_dt_to_str): + mock_ts_to_dt.return_value = datetime.datetime(2020, 2, 19, 21, 44, 7, 977355) + mock_dt_to_str.return_value = "2020-02-19 21:44" + res = utils.ts_to_str(1693519260.0) + self.assertEqual(res, mock_dt_to_str.return_value) + + def test_ts_to_dt(self): + res = utils.ts_to_dt(crmutils.parse_to_timestamp("2023-09-01 06:01")) + self.assertEqual(utils.dt_to_str(res), "2023-09-01 06:01:00") + + def test_now(self): + expected_res = datetime.datetime.now().strftime(constants.TIME_FORMAT) + res = utils.now() + self.assertEqual(res, expected_res) + + @mock.patch('crmsh.utils.str2file') + @mock.patch('crmsh.utils.read_from_file') + @mock.patch('os.path.isfile') + @mock.patch('crmsh.report.utils.now') + def test_create_description_template(self, mock_now, mock_isfile, mock_read, mock_str2file): + mock_now.return_value = "2023-09-01 06:01" + sys.argv = ["crm", "report", "option1"] + mock_ctx_inst = mock.Mock(node_list=["node1"], work_dir="/opt/workdir") + mock_isfile.return_value = True + mock_read.return_value = "data" + utils.create_description_template(mock_ctx_inst) + + @mock.patch('crmsh.utils.str2file') + @mock.patch('crmsh.report.utils.extract_critical_log') + @mock.patch('crmsh.report.utils.check_collected_files') + @mock.patch('crmsh.report.utils.compare_and_consolidate_files') + @mock.patch('glob.glob') + def test_analyze(self, mock_glob, mock_compare, mock_check_collected, mock_extract, mock_str2file): + mock_compare.return_value = "data" + mock_check_collected.return_value = "" + mock_extract.return_value = "" + mock_ctx_inst = mock.Mock(work_dir="/opt/work_dir") + utils.analyze(mock_ctx_inst) + mock_str2file.assert_called_once_with("data", f"/opt/work_dir/{constants.ANALYSIS_F}") + + @mock.patch('crmsh.report.utils.consolidate') + @mock.patch('crmsh.report.utils.do_compare') + @mock.patch('glob.glob') + def test_compare_and_consolidate_files(self, mock_glob, mock_compare, mock_consolidate): + mock_ctx_inst = mock.Mock(work_dir="/opt/work_dir") + mock_glob.side_effect = [False, True, True, True, True] + mock_compare.side_effect = [(0, ""), (0, ""), (0, ""), (0, "")] + res = utils.compare_and_consolidate_files(mock_ctx_inst) + self.assertEqual(f"Diff {constants.MEMBERSHIP_F}... no {constants.MEMBERSHIP_F} found in /opt/work_dir\nDiff {constants.CRM_MON_F}... OK\nDiff {constants.COROSYNC_F}... OK\nDiff {constants.SYSINFO_F}... OK\nDiff {constants.CIB_F}... OK\n\n", res) + + @mock.patch('crmsh.utils.read_from_file') + @mock.patch('crmsh.utils.file_is_empty') + @mock.patch('os.path.isfile') + def test_check_collected_files(self, mock_isfile, mock_is_empty, mock_read): + mock_ctx_inst = mock.Mock(node_list=["node1"], work_dir="/opt/work_dir") + mock_isfile.side_effect = [False, False, True] + mock_is_empty.return_value = False + mock_read.return_value = "data" + res = utils.check_collected_files(mock_ctx_inst) + self.assertEqual(res, ["Checking problems with permissions/ownership at node1:", "data"]) + + @mock.patch('logging.Logger.error') + @mock.patch('crmsh.utils.parse_to_timestamp') + def test_parse_to_timestamp_none(self, mock_parse, mock_error): + mock_parse.return_value = None + with self.assertRaises(utils.ReportGenericError) as err: + utils.parse_to_timestamp("xxxxx") + mock_error.assert_has_calls([ + mock.call(f"Invalid time string 'xxxxx'"), + mock.call('Try these formats like: 2pm; "2019/9/5 12:30"; "09-Sep-07 2:00"; "[1-9][0-9]*[YmdHM]"') + ]) + + @mock.patch('logging.Logger.error') + @mock.patch('crmsh.utils.parse_to_timestamp') + def test_parse_to_timestamp(self, mock_parse, mock_error): + mock_parse.return_value = 1234567 + res = utils.parse_to_timestamp("2023") + self.assertEqual(res, mock_parse.return_value) + + def test_parse_to_timestamp_delta(self): + timedelta = datetime.timedelta(days=10) + expected_timestamp = (datetime.datetime.now() - timedelta).timestamp() + res = utils.parse_to_timestamp("10d") + self.assertEqual(int(res), int(expected_timestamp)) + + @mock.patch('crmsh.utils.read_from_file') + @mock.patch('glob.glob') + def test_extract_critical_log(self, mock_glob, mock_read): + mock_glob.return_value = ["/opt/workdir/pacemaker.log"] + mock_read.return_value = """ +line1 +pacemaker-controld[5678]: warning: data +pacemaker-schedulerd[5677]: error: Resource +line4 + """ + mock_ctx_inst = mock.Mock(work_dir="/opt/workdir") + res = utils.extract_critical_log(mock_ctx_inst) + expected_data = """ +WARNINGS or ERRORS in pacemaker.log: +pacemaker-controld[5678]: warning: data +pacemaker-schedulerd[5677]: error: Resource""" + self.assertEqual('\n'.join(res), expected_data) + + def test_findln_by_timestamp_1(self): + data = """Apr 03 11:03:31 15sp1-1 pacemaker-fenced [1944] (pcmk_cpg_membership) info: Group event stonith-ng.3: node 2 joined +Apr 03 11:03:41 15sp1-1 pacemaker-fenced [1944] (pcmk_cpg_membership) info: Group event stonith-ng.3: node 1 (15sp1-1) is member +Apr 03 11:03:51 15sp1-1 pacemaker-fenced [1944] (corosync_node_name) info: Unable to get node name for nodeid 2""" + data_list = data.split('\n') + constants.STAMP_TYPE = utils.determin_log_format(data) + first_timestamp = utils.get_timestamp(data_list[0]) + middle_timestamp = utils.get_timestamp(data_list[1]) + last_timestamp = utils.get_timestamp(data_list[2]) + assert first_timestamp < middle_timestamp < last_timestamp + line_stamp = crmutils.parse_to_timestamp("Apr 03 11:03:41") + result_line = utils.findln_by_timestamp(data, line_stamp) + assert result_line == 2 + line_stamp = crmutils.parse_to_timestamp("Apr 03 12:03:41") + result_line = utils.findln_by_timestamp(data, line_stamp) + assert result_line == 3 + + def test_findln_by_timestamp_irregular(self): + data = """line1 + line2 + line3 + line4""" + target_time = "Apr 03 13:10" + target_time_stamp = crmutils.parse_to_timestamp(target_time) + result_line = utils.findln_by_timestamp(data, target_time_stamp) + self.assertIsNone(result_line) + + def test_findln_by_timestamp(self): + target_time = "Apr 03 13:10" + target_time_stamp = crmutils.parse_to_timestamp(target_time) + with open('pacemaker.log') as f: + data = f.read() + constants.STAMP_TYPE = utils.determin_log_format(data) + result_line = utils.findln_by_timestamp(data, target_time_stamp) + result_line_stamp = utils.get_timestamp(data.split('\n')[result_line-1]) + assert result_line_stamp > target_time_stamp + result_pre_line_stamp = utils.get_timestamp(data.split('\n')[result_line-2]) + assert result_pre_line_stamp < target_time_stamp + + target_time = "Apr 03 11:01:19" + target_time_stamp = crmutils.parse_to_timestamp(target_time) + result_line = utils.findln_by_timestamp(data, target_time_stamp) + result_time = ' '.join(data.split('\n')[result_line-1].split()[:3]) + self.assertEqual(result_time, target_time) diff --git a/test/unittests/test_utils.py b/test/unittests/test_utils.py index afe7f124dc..b3cadf1f36 100644 --- a/test/unittests/test_utils.py +++ b/test/unittests/test_utils.py @@ -72,31 +72,6 @@ def test_check_file_content_included(mock_detect, mock_run): ]) -@mock.patch("crmsh.sh.ShellUtils.get_stdout_stderr") -def test_get_iplist_corosync_using_exception(mock_run): - mock_run.return_value = (1, None, "error of cfgtool") - with pytest.raises(ValueError) as err: - utils.get_iplist_corosync_using() - assert str(err.value) == "error of cfgtool" - mock_run.assert_called_once_with("corosync-cfgtool -s") - - -@mock.patch("crmsh.sh.ShellUtils.get_stdout_stderr") -def test_get_iplist_corosync_using(mock_run): - output = """ -RING ID 0 - id = 192.168.122.193 - status = ring 0 active with no faults -RING ID 1 - id = 10.10.10.121 - status = ring 1 active with no faults -""" - mock_run.return_value = (0, output, None) - res = utils.get_iplist_corosync_using() - assert res == ["192.168.122.193", "10.10.10.121"] - mock_run.assert_called_once_with("corosync-cfgtool -s") - - @mock.patch('re.search') @mock.patch('crmsh.sh.ShellUtils.get_stdout') def test_get_nodeid_from_name_run_None1(mock_get_stdout, mock_re_search): @@ -141,36 +116,10 @@ def test_check_ssh_passwd_need(mock_run): assert res is True mock_run.assert_called_once_with( "bob", - "ssh -o StrictHostKeyChecking=no -o EscapeChar=none -o ConnectTimeout=15 -T -o Batchmode=yes alice@node1 true", + " ssh -o StrictHostKeyChecking=no -o EscapeChar=none -o ConnectTimeout=15 -T -o Batchmode=yes alice@node1 true", ) -@mock.patch('logging.Logger.debug') -@mock.patch('crmsh.sh.ShellUtils.get_stdout_stderr') -def test_get_member_iplist_None(mock_get_stdout_stderr, mock_common_debug): - mock_get_stdout_stderr.return_value = (1, None, "Failed to initialize the cmap API. Error CS_ERR_LIBRARY") - assert utils.get_member_iplist() is None - mock_get_stdout_stderr.assert_called_once_with('corosync-cmapctl -b runtime.totem.pg.mrp.srp.members') - mock_common_debug.assert_called_once_with('Failed to initialize the cmap API. Error CS_ERR_LIBRARY') - - -def test_get_member_iplist(): - with mock.patch('crmsh.sh.ShellUtils.get_stdout_stderr') as mock_get_stdout_stderr: - cmap_value = ''' -runtime.totem.pg.mrp.srp.members.336860211.config_version (u64) = 0 -runtime.totem.pg.mrp.srp.members.336860211.ip (str) = r(0) ip(20.20.20.51) -runtime.totem.pg.mrp.srp.members.336860211.join_count (u32) = 1 -runtime.totem.pg.mrp.srp.members.336860211.status (str) = joined -runtime.totem.pg.mrp.srp.members.336860212.config_version (u64) = 0 -runtime.totem.pg.mrp.srp.members.336860212.ip (str) = r(0) ip(20.20.20.52) -runtime.totem.pg.mrp.srp.members.336860212.join_count (u32) = 1 -runtime.totem.pg.mrp.srp.members.336860212.status (str) = joined - ''' - mock_get_stdout_stderr.return_value = (0, cmap_value, None) - assert utils.get_member_iplist() == ['20.20.20.51', '20.20.20.52'] - mock_get_stdout_stderr.assert_called_once_with('corosync-cmapctl -b runtime.totem.pg.mrp.srp.members') - - @mock.patch('crmsh.utils.list_cluster_nodes') def test_cluster_run_cmd_exception(mock_list_nodes): mock_list_nodes.return_value = None @@ -273,9 +222,6 @@ def test_str2tmp(): assert os.path.isfile(filename) assert open(filename).read() == txt + "\n" assert utils.file2str(filename) == txt - # TODO: should this really return - # an empty line at the end? - assert utils.file2list(filename) == [txt, ''] os.unlink(filename) @@ -689,19 +635,6 @@ def test_is_ipv6(self, mock_version): self.assertEqual(res, False) mock_version.assert_called_once_with() - @mock.patch('crmsh.utils.IP.ip_address', new_callable=mock.PropertyMock) - def test_is_valid_ip_exception(self, mock_ip_address): - mock_ip_address.side_effect = ValueError - res = utils.IP.is_valid_ip("xxxx") - self.assertEqual(res, False) - mock_ip_address.assert_called_once_with() - - @mock.patch('crmsh.utils.IP.ip_address', new_callable=mock.PropertyMock) - def test_is_valid_ip(self, mock_ip_address): - res = utils.IP.is_valid_ip("10.10.10.1") - self.assertEqual(res, True) - mock_ip_address.assert_called_once_with() - @mock.patch('crmsh.utils.IP.ip_address', new_callable=mock.PropertyMock) def test_is_loopback(self, mock_ip_address): mock_ip_address_inst = mock.Mock(is_loopback=False) @@ -710,14 +643,6 @@ def test_is_loopback(self, mock_ip_address): self.assertEqual(res, mock_ip_address_inst.is_loopback) mock_ip_address.assert_called_once_with() - @mock.patch('crmsh.utils.IP.ip_address', new_callable=mock.PropertyMock) - def test_is_link_local(self, mock_ip_address): - mock_ip_address_inst = mock.Mock(is_link_local=False) - mock_ip_address.return_value = mock_ip_address_inst - res = self.ip_inst.is_link_local - self.assertEqual(res, mock_ip_address_inst.is_link_local) - mock_ip_address.assert_called_once_with() - class TestInterface(unittest.TestCase): """ @@ -764,18 +689,6 @@ def test_network(self, mock_ip_interface): assert self.interface.network == "10.10.10.0" mock_ip_interface.assert_called_once_with() - @mock.patch('crmsh.utils.Interface.ip_interface', new_callable=mock.PropertyMock) - @mock.patch('crmsh.utils.IP') - def test_ip_in_network(self, mock_ip, mock_ip_interface): - mock_ip_inst = mock.Mock(ip_address="10.10.10.123") - mock_ip.return_value = mock_ip_inst - mock_ip_interface_inst = mock.Mock(network=["10.10.10.123"]) - mock_ip_interface.return_value = mock_ip_interface_inst - res = self.interface.ip_in_network("10.10.10.123") - assert res is True - mock_ip.assert_called_once_with("10.10.10.123") - mock_ip_interface.assert_called_once_with() - class TestInterfacesInfo(unittest.TestCase): """ @@ -950,48 +863,6 @@ def test_get_default_ip_list(self, mock_nic_list, mock_first_ip): mock_nic_list.assert_has_calls([mock.call(), mock.call(), mock.call()]) mock_first_ip.assert_has_calls([mock.call("eth1"), mock.call("eth0")]) - @mock.patch('crmsh.utils.Interface') - @mock.patch('crmsh.utils.InterfacesInfo.interface_list', new_callable=mock.PropertyMock) - @mock.patch('crmsh.utils.InterfacesInfo.get_interfaces_info') - @mock.patch('crmsh.utils.IP.is_ipv6') - def test_ip_in_network(self, mock_is_ipv6, mock_get_interfaces_info, mock_interface_list, mock_interface): - mock_is_ipv6.return_value = False - mock_interface_inst_1 = mock.Mock() - mock_interface_inst_2 = mock.Mock() - mock_interface_inst_1.ip_in_network.return_value = False - mock_interface_inst_2.ip_in_network.return_value = True - mock_interface_list.return_value = [mock_interface_inst_1, mock_interface_inst_2] - - res = utils.InterfacesInfo.ip_in_network("10.10.10.1") - assert res is True - - mock_is_ipv6.assert_called_once_with("10.10.10.1") - mock_get_interfaces_info.assert_called_once_with() - mock_interface_list.assert_called_once_with() - mock_interface_inst_1.ip_in_network.assert_called_once_with("10.10.10.1") - mock_interface_inst_2.ip_in_network.assert_called_once_with("10.10.10.1") - - @mock.patch('crmsh.utils.Interface') - @mock.patch('crmsh.utils.InterfacesInfo.interface_list', new_callable=mock.PropertyMock) - @mock.patch('crmsh.utils.InterfacesInfo.get_interfaces_info') - @mock.patch('crmsh.utils.IP.is_ipv6') - def test_ip_in_network_false(self, mock_is_ipv6, mock_get_interfaces_info, mock_interface_list, mock_interface): - mock_is_ipv6.return_value = False - mock_interface_inst_1 = mock.Mock() - mock_interface_inst_2 = mock.Mock() - mock_interface_inst_1.ip_in_network.return_value = False - mock_interface_inst_2.ip_in_network.return_value = False - mock_interface_list.return_value = [mock_interface_inst_1, mock_interface_inst_2] - - res = utils.InterfacesInfo.ip_in_network("10.10.10.1") - assert res is False - - mock_is_ipv6.assert_called_once_with("10.10.10.1") - mock_get_interfaces_info.assert_called_once_with() - mock_interface_list.assert_called_once_with() - mock_interface_inst_1.ip_in_network.assert_called_once_with("10.10.10.1") - mock_interface_inst_2.ip_in_network.assert_called_once_with("10.10.10.1") - @mock.patch("crmsh.utils.get_nodeid_from_name") def test_get_iplist_from_name_no_nodeid(mock_get_nodeid):