From ddfab60e1593eeb75546b53ac617d7c89fd10c15 Mon Sep 17 00:00:00 2001 From: Xiangce Liu Date: Tue, 17 Dec 2024 20:40:35 +0800 Subject: [PATCH] feat: add a new base parser LazyLogFileOutput - Doesn't like the LogFileOutput, the LazyLogFileOutput doesn't load the content during initialization, its content can only be loaded via calling the `do_scan` method explicitly. - It's useful for the cases which need to load thousands of files belongs to one single Spec in one pass of running. - For implementation's convenience, converted the "scanners" from `list` `dict` in which the `result_key` is key and `scanner` is value. And removed the `scanner_keys`. - Update the LsPci accordingly. - see RHINENG-13810 Signed-off-by: Xiangce Liu --- insights/core/__init__.py | 150 +++++++--- insights/parsers/lspci.py | 14 +- insights/tests/test_lazylogfileoutput.py | 362 +++++++++++++++++++++++ 3 files changed, 477 insertions(+), 49 deletions(-) create mode 100644 insights/tests/test_lazylogfileoutput.py diff --git a/insights/core/__init__.py b/insights/core/__init__.py index e87e330e08..959f75a202 100644 --- a/insights/core/__init__.py +++ b/insights/core/__init__.py @@ -77,7 +77,9 @@ class Parser(object): """ def __init__(self, context): - self.file_path = os.path.join("/", context.relative_path) if context.relative_path is not None else None + self.file_path = ( + os.path.join("/", context.relative_path) if context.relative_path is not None else None + ) """str: Full context path of the input file.""" self.file_name = os.path.basename(context.path) if context.path is not None else None """str: Filename portion of the input file.""" @@ -150,6 +152,7 @@ def inner(children, stack): if c.children: c.children = inner(c.children, stack) return results + return inner(docs, []) @@ -340,6 +343,7 @@ class ConfigParser(Parser, ConfigComponent): Raises: SkipComponent: When input content is empty. """ + def parse_content(self, content): if not content: raise SkipComponent('Empty content.') @@ -359,6 +363,7 @@ class ConfigCombiner(ConfigComponent): include directives for supplementary configuration files. httpd and nginx are examples. """ + def __init__(self, confs, main_file, include_finder): self.confs = confs self.main = self.find_main(main_file) @@ -396,6 +401,7 @@ class ContainerConfigCombiner(ConfigCombiner): files with include directives for supplementary configuration files. httpd and nginx are examples. """ + def __init__(self, confs, main_file, include_finder, engine, image, container_id): self.image = image """str: The image of the container.""" @@ -516,8 +522,7 @@ def parse_content(self, content): # Either only one thing or line or rest starts with comment # but either way we need to have an equals in the first word. - if (len(words) == 1 or (len(words) > 1 and words[1][0] == '#')) \ - and '=' in words[0]: + if (len(words) == 1 or (len(words) > 1 and words[1][0] == '#')) and '=' in words[0]: key, value = words[0].split('=', 1) result[key] = value # Only store lines if they aren't comments or blank @@ -527,7 +532,7 @@ def parse_content(self, content): self.unparsed_lines = unparsed_lines def keys(self): - """ Return the list of keys (in no order) in the underlying dictionary.""" + """Return the list of keys (in no order) in the underlying dictionary.""" return self.data.keys() @@ -544,11 +549,11 @@ class CommandParser(Parser): """ __bad_single_lines = [ - "no such file or directory", - "not a directory", - "command not found", - "no module named", - "no files found for", + "no such file or directory", + "not a directory", + "command not found", + "no module named", + "no files found for", ] """ This variable contains filters for bad responses of the single line @@ -556,7 +561,7 @@ class CommandParser(Parser): When adding a new line to the list make sure text is all lower case. """ __bad_lines = [ - "missing dependencies:", + "missing dependencies:", ] """ This variable contains filters for bad responses of the lines @@ -606,7 +611,9 @@ def __init__(self, context, extra_bad_lines=None): `self.__bad_single_lines` and `self.__bad_lines`. """ extra_bad_lines = [] if extra_bad_lines is None else extra_bad_lines - valid_lines = self.validate_lines(context.content, self.__bad_single_lines, self.__bad_lines) + valid_lines = self.validate_lines( + context.content, self.__bad_single_lines, self.__bad_lines + ) if valid_lines and extra_bad_lines: valid_lines = self.validate_lines(context.content, extra_bad_lines, extra_bad_lines) if not valid_lines: @@ -621,6 +628,7 @@ class ContainerParser(CommandParser): A class specifically for container parser, with the "image" name, the engine provider and the container ID on the basis of ``Parser``. """ + def __init__(self, context): self.image = context.image """str: The image of the container.""" @@ -702,7 +710,11 @@ def parse_content(self, content): if len(content) > 3: self.raw = '\n'.join(content) self.dom = ET.fromstring(self.raw) - self.xmlns = self.dom.tag.strip("{").split("}")[0] if all(c in self.dom.tag for c in ["{", "}"]) else "" + self.xmlns = ( + self.dom.tag.strip("{").split("}")[0] + if all(c in self.dom.tag for c in ["{", "}"]) + else "" + ) self.data = self.parse_dom() def get_elements(self, element, xmlns=None): @@ -754,6 +766,7 @@ class YAMLParser(Parser, LegacyItemAccess): """ A parser class that reads YAML files. Base your own parser on this. """ + def parse_content(self, content): try: if type(content) is list: @@ -779,6 +792,7 @@ class JSONParser(Parser, LegacyItemAccess): """ A parser class that reads JSON files. Base your own parser on this. """ + def parse_content(self, content): try: if isinstance(content, list): @@ -803,8 +817,7 @@ def parse_content(self, content): class ScanMeta(type): def __new__(cls, name, parents, dct): - dct["scanners"] = [] - dct["scanner_keys"] = set() + dct["scanners"] = dict() return super(ScanMeta, cls).__new__(cls, name, parents, dct) @@ -866,6 +879,7 @@ def has_fcoe_edd(line): strings or False). """ + def __init__(self, *args, **kwargs): deprecated(Scannable, "Please use the :class:`insights.core.Parser` instead.", "3.3.0") super(Scannable, self).__init__(*args, **kwargs) @@ -879,11 +893,10 @@ def _scan(cls, result_key, scanner): retrieved later by a rule. """ - if result_key in cls.scanner_keys: + if result_key in cls.scanners: raise ValueError("'%s' is already a registered scanner key" % result_key) - cls.scanners.append(scanner) - cls.scanner_keys.add(result_key) + cls.scanners.update({result_key: scanner}) @classmethod def any(cls, result_key, func): @@ -891,6 +904,7 @@ def any(cls, result_key, func): Sets the `result_key` to the output of `func` if `func` ever returns truthy """ + def scanner(self, obj): current_value = getattr(self, result_key, None) setattr(self, result_key, current_value or func(obj)) @@ -903,6 +917,7 @@ def collect(cls, result_key, func): Sets the `result_key` to an iterable of objects for which `func(obj)` returns True """ + def scanner(self, obj): if not getattr(self, result_key, None): setattr(self, result_key, []) @@ -922,13 +937,13 @@ def parse(self, content): def parse_content(self, content): for obj in self.parse(content): - for scanner in self.scanners: + for scanner in self.scanners.values(): scanner(self, obj) class TextFileOutput(six.with_metaclass(ScanMeta, Parser)): """ - Class for parsing gerenal text file content. + Class for parsing general text file content. File content is stored in raw format in the ``lines`` attribute. @@ -983,13 +998,14 @@ class TextFileOutput(six.with_metaclass(ScanMeta, Parser)): True """ + def parse_content(self, content): """ Use all the defined scanners to search the log file, setting the properties defined in the scanner. """ self.lines = content - for scanner in self.scanners: + for scanner in self.scanners.values(): scanner(self) def __contains__(self, s): @@ -1018,8 +1034,7 @@ def _valid_search(self, s, check=all): """ if isinstance(s, six.string_types): return lambda l: s in l - elif (isinstance(s, list) and len(s) > 0 and - all(isinstance(w, six.string_types) for w in s)): + elif isinstance(s, list) and len(s) > 0 and all(isinstance(w, six.string_types) for w in s): return lambda l: check(w in l for w in s) elif s is not None: raise TypeError('Search items must be given as a string or a list of strings') @@ -1065,15 +1080,14 @@ def scan(cls, result_key, func): ValueError: When `result_key` is already a registered scanner key. """ - if result_key in cls.scanner_keys: + if result_key in cls.scanners: raise ValueError("'%s' is already a registered scanner key" % result_key) def scanner(self): result = func(self) setattr(self, result_key, result) - cls.scanners.append(scanner) - cls.scanner_keys.add(result_key) + cls.scanners.update({result_key: scanner}) @classmethod def token_scan(cls, result_key, token, check=all): @@ -1090,6 +1104,7 @@ def token_scan(cls, result_key, token, check=all): (bool): the property will contain True if a line contained (any or all) of the tokens given. """ + def _scan(self): search_by_expression = self._valid_search(token, check) return any(search_by_expression(l) for l in self.lines) @@ -1112,6 +1127,7 @@ def keep_scan(cls, result_key, token, check=all, num=None, reverse=False): Returns: (list): list of dictionaries corresponding to the parsed lines contain the `token`. """ + def _scan(self): return self.get(token, check=check, num=num, reverse=reverse) @@ -1131,6 +1147,7 @@ def last_scan(cls, result_key, token, check=all): Returns: (dict): dictionary corresponding to the last parsed line contains the `token`. """ + def _scan(self): ret = self.get(token, check=check, num=1, reverse=True) return ret[0] if ret else dict() @@ -1247,12 +1264,15 @@ def get_after(self, timestamp, s=None): # character sets. Note that we don't include time zone or other # outputs (e.g. day-of-year) that don't usually occur in time stamps. format_conversion_for = { - 'a': r'\w{3}', 'A': r'\w+', # Week day name + 'a': r'\w{3}', + 'A': r'\w+', # Week day name 'w': r'[0123456]', # Week day number 'd': r'([0 ][123456789]|[12]\d|3[01])', # Day of month - 'b': r'\w{3}', 'B': r'\w+', # Month name + 'b': r'\w{3}', + 'B': r'\w+', # Month name 'm': r'([0 ]\d|1[012])', # Month number - 'y': r'\d{2}', 'Y': r'\d{4}', # Year + 'y': r'\d{2}', + 'Y': r'\d{4}', # Year 'H': r'([01 ]\d|2[0123])', # Hour - 24 hour format 'I': r'([0 ]?\d|1[012])', # Hour - 12 hour format 'p': r'\w{2}', # AM / PM @@ -1269,9 +1289,7 @@ def replacer(match): return format_conversion_for[match.group(1)] else: raise ParseException( - "get_after does not understand strptime format '{c}'".format( - c=match.group(0) - ) + "get_after does not understand strptime format '{c}'".format(c=match.group(0)) ) # Please do not attempt to be tricky and put a regular expression @@ -1284,18 +1302,19 @@ def replacer(match): if isinstance(time_format, dict): time_format = list(time_format.values()) if isinstance(time_format, six.string_types): - logs_have_year = ('%Y' in time_format or '%y' in time_format) + logs_have_year = '%Y' in time_format or '%y' in time_format time_re = re.compile('(' + timefmt_re.sub(replacer, time_format) + ')') # Curry strptime with time_format string. def test_parser(logstamp): return datetime.datetime.strptime(logstamp, time_format) + parse_fn = test_parser elif isinstance(time_format, list): logs_have_year = all('%Y' in tf or '%y' in tf for tf in time_format) - time_re = re.compile('(' + '|'.join( - timefmt_re.sub(replacer, tf) for tf in time_format - ) + ')') + time_re = re.compile( + '(' + '|'.join(timefmt_re.sub(replacer, tf) for tf in time_format) + ')' + ) def test_all_parsers(logstamp): # One of these must match, because the regex has selected only @@ -1306,12 +1325,11 @@ def test_all_parsers(logstamp): except ValueError: pass return ts + parse_fn = test_all_parsers else: raise ParseException( - "get_after does not recognise time formats of type {t}".format( - t=type(time_format) - ) + "get_after does not recognise time formats of type {t}".format(t=type(time_format)) ) # Most logs will appear in string format, but some logs (e.g. @@ -1358,7 +1376,46 @@ def test_all_parsers(logstamp): yield self._parse_line(line) -class Syslog(LogFileOutput): +class LazyLogFileOutput(LogFileOutput): + """ + Class for parsing log file content. Doesn't like the LogFileOutput, + the LazyLogFileOutput doesn't load the content during initialization, + its content can only be loaded via calling the `do_scan` or `lines` + explicitly. It's useful for the cases which need to load thousands of + files belongs to one single Spec in one pass of running. + Other than the lazy content loading, it's the same as the LogFileOutput. + """ + + def _handle_content(self, context): + self._lines = None + self._context = context + self._scanned = set() + + def do_scan(self, result_key=None): + """ + Do the actual scanning operations as per the specified `result_key`. + When `result_key` is not specified, all registered scanners will be + executed. Each registered scanner can only be executed once. + """ + if result_key: + if result_key not in self._scanned and result_key in self.scanners: + self.scanners[result_key](self) + self._scanned.add(result_key) + else: + for key, scanner in self.scanners.items(): + if key not in self._scanned: + self._scanned.add(key) + scanner(self) + + @property + def lines(self): + if self._lines is None: + # one-shot load all content lines here + self._lines = self._context.content + return self._lines + + +class Syslog(LazyLogFileOutput): """Class for parsing syslog file content. The important function is :func:`get(s)`, which finds all lines with the @@ -1405,6 +1462,7 @@ class Syslog(LogFileOutput): the year of the logs will be inferred from the year in your timestamp. This will also work around December/January crossovers. """ + time_format = '%b %d %H:%M:%S' def _parse_line(self, line): @@ -1501,8 +1559,11 @@ class IniConfigFile(ConfigParser): >>> my_config.has_option('logging', 'log') True """ + def parse_doc(self, content): - return iniparser.parse_doc("\n".join(content), self, return_defaults=True, return_booleans=False) + return iniparser.parse_doc( + "\n".join(content), self, return_defaults=True, return_booleans=False + ) def parse_content(self, content, allow_no_value=False): super(IniConfigFile, self).parse_content(content) @@ -1584,7 +1645,7 @@ def getboolean(self, section, option): 'true': True, 'false': False, 'on': True, - 'off': False + 'off': False, } if val.lower() not in boolean_states: @@ -1658,7 +1719,8 @@ def __contains__(self, section): def __repr__(self): return "INI file '{filename}' - sections:{sections}".format( - filename=self.file_name, sections=self.sections()) + filename=self.file_name, sections=self.sections() + ) class FileListing(Parser): @@ -1743,7 +1805,9 @@ def __init__(self, context): # the directory name in the output). Obviously if we don't have the # '-R' flag we should grab this but it's probably not worth parsing # the flags to ls for this. - deprecated(FileListing, "Please use the :class:`insights.parsers.ls.FileListing instead.", "3.5.0") + deprecated( + FileListing, "Please use the :class:`insights.parsers.ls.FileListing instead.", "3.5.0" + ) self.first_path = None path_re = re.compile(r'ls_-\w+(?P.*)$') match = path_re.search(context.path) diff --git a/insights/parsers/lspci.py b/insights/parsers/lspci.py index dff28da6c4..3753dba1ff 100644 --- a/insights/parsers/lspci.py +++ b/insights/parsers/lspci.py @@ -12,6 +12,7 @@ ------------------------------------- """ + import re from insights.core import CommandParser, TextFileOutput @@ -73,11 +74,13 @@ class LsPci(CommandParser, TextFileOutput): values of `self.data` """ + def parse_content(self, content): # Use all the defined scanners to search the log file, setting the # properties defined in the scanner. - self.lines = [l for l in content if len(l) > 0 and l[0].isdigit()] - for scanner in self.scanners: + content = get_active_lines(content) + self.lines = [l for l in content if l and l[0].isdigit()] + for scanner in self.scanners.values(): scanner(self) # Parse kernel driver lines self.data = {} @@ -86,15 +89,13 @@ def parse_content(self, content): fields = ["Subsystem", "Kernel driver in use", "Kernel modules"] - for line in get_active_lines(content): + for line in content: parts = line.split() if slot_re.match(parts[0]): slot = parts[0] device_details = line.split(None, 1)[-1] # keep the raw line - self.data[slot] = { - 'Slot': slot, - 'Dev_Details': device_details.lstrip()} + self.data[slot] = {'Slot': slot, 'Dev_Details': device_details.lstrip()} elif slot and (line.split(":")[0].strip() in fields): parts = line.split(':') self.data[slot][parts[0]] = parts[1].lstrip() @@ -180,6 +181,7 @@ class LsPciVmmkn(CommandParser, list): Attributes: """ + def parse_content(self, content): # Remove the white-trailing of the output while content and not content[-1].strip(): diff --git a/insights/tests/test_lazylogfileoutput.py b/insights/tests/test_lazylogfileoutput.py new file mode 100644 index 0000000000..7508d52b5e --- /dev/null +++ b/insights/tests/test_lazylogfileoutput.py @@ -0,0 +1,362 @@ +# -*- coding: UTF-8 -*- +import pytest + +from datetime import datetime + +from insights.core import LazyLogFileOutput +from insights.core.exceptions import ParseException +from insights.tests import context_wrap + + +class FakeMessagesClass(LazyLogFileOutput): + time_format = '%b %d %H:%M:%S' + + def bad_get_after(self, timestamp, lines=None): + self.time_format = '%q/%1 %z:%v:%j' + return super(FakeMessagesClass, self).get_after(timestamp, lines) + + def superget(self, s): + for line in self.lines: + if s in line: + parts = line.split(None, 6) + yield { + 'timestamp': ' '.join(parts[0:3]), + 'hostname': parts[3], + 'service': parts[4][:-1], # strip colon + 'message': parts[5], + } + + def listget(self, s): + # Silly function to split lines containing s. + for line in self.lines: + if s in line: + yield s.split(None, 6) + + +MESSAGES = """ +Mar 27 03:18:15 system rsyslogd: [origin software="rsyslogd" swVersion="5.8.10" x-pid="1870" x-info="http://www.rsyslog.com"] first +Mar 27 03:18:16 system rsyslogd-2177: imuxsock lost 141 messages from pid 55082 due to rate-limiting +Mar 27 03:18:19 system rsyslogd-2177: imuxsock begins to drop messages from pid 55082 due to rate-limiting +Mar 27 03:18:21 system pulp: pulp.server.db.connection:INFO: Attempting Database connection with seeds = localhost:27017 +Mar 27 03:18:21 system pulp: pulp.server.db.connection:INFO: Connection Arguments: {'max_pool_size': 10} +Mar 27 03:18:21 system pulp: pulp.server.db.connection:INFO: Database connection established with: seeds = localhost:27017, name = pulp_database +Mar 27 03:18:22 system rsyslogd-2177: imuxsock lost 145 messages from pid 55082 due to rate-limiting +Mar 27 03:18:24 system puppet-master[48226]: Setting manifest is deprecated in puppet.conf. See http://links.puppetlabs.com/env-settings-deprecations +Mar 27 03:18:24 system puppet-master[48226]: (at /usr/lib/ruby/site_ruby/1.8/puppet/settings.rb:1095:in `issue_deprecations') +Mar 27 03:18:24 system puppet-master[48226]: Setting modulepath is deprecated in puppet.conf. See http://links.puppetlabs.com/env-settings-deprecations +Mar 27 03:18:24 system puppet-master[48226]: (at /usr/lib/ruby/site_ruby/1.8/puppet/settings.rb:1095:in `issue_deprecations') +Mar 27 03:18:24 system puppet-master[48226]: Setting config_version is deprecated in puppet.conf. See http://links.puppetlabs.com/env-settings-deprecations +Mar 27 03:18:24 system puppet-master[48226]: (at /usr/lib/ruby/site_ruby/1.8/puppet/settings.rb:1095:in `issue_deprecations') +Mar 27 03:18:25 system rsyslogd-2177: imuxsock begins to drop messages from pid 55082 due to rate-limiting +Mar 27 03:39:43 system pulp: pulp.server.webservices.middleware.exception:ERROR: File "/usr/lib/python2.6/site-packages/web/application.py", line 230, in handle + return self._delegate(fn, self.fvars, args) + File "/usr/lib/python2.6/site-packages/web/application.py", line 405, in _delegate + return handle_class(f) + File "/usr/lib/python2.6/site-packages/web/application.py", line 396, in handle_class + return tocall(*args) + File "/usr/lib/python2.6/site-packages/pulp/server/webservices/controllers/decorators.py", line 227, in _auth_decorator + value = method(self, *args, **kwargs) + File "/usr/lib/python2.6/site-packages/pulp/server/webservices/controllers/consumers.py", line 503, in GET + profile = manager.get_profile(consumer_id, content_type) + File "/usr/lib/python2.6/site-packages/pulp/server/managers/consumer/profile.py", line 120, in get_profile + raise MissingResource(profile_id=profile_id) +MissingResource: Missing resource(s): profile_id={'content_type': u'rpm', 'consumer_id': u'1786cd7f-2ab2-4212-9798-c0a454e97900'} +Mar 27 03:39:43 system rsyslogd-2177: imuxsock begins to drop messages from pid 55082 due to rate-limiting +Mar 27 03:39:46 system rsyslogd-2177: imuxsock lost 165 messages from pid 55082 due to rate-limiting +Mar 27 03:39:49 system rsyslogd-2177: imuxsock begins to drop messages from pid 55082 due to rate-limiting +Mar 27 03:49:10 system pulp: pulp.server.webservices.middleware.exception:ERROR: Missing resource(s): profile_id={'content_type': u'rpm', 'consumer_id': u'79d5aed1-5631-4f40-b970-585ee974eb87'} +""" + + +def test_lazylogfileoutput_scanners(): + # Messages that are present can be kept + FakeMessagesClass.keep_scan('puppet_master_logs', ' puppet-master') + # Messages that are absent still turn up as an empty list + FakeMessagesClass.keep_scan('kernel_logs', ' kernel') + # Token scan of something that's present + FakeMessagesClass.token_scan( + 'middleware_exception_present', 'pulp.server.webservices.middleware.exception' + ) + # Token scan of something that's absent + FakeMessagesClass.token_scan('cron_present', 'CRONTAB') + + # Check that duplicate scanners raise a value exception + with pytest.raises(ValueError) as exc: + FakeMessagesClass.keep_scan('kernel_logs', ' kernel') + assert "'kernel_logs' is already a registered scanner key" in str(exc) + + def count_lost_messages(self): + imuxsock_lines = 0 + lost_messages = 0 + for line in self.lines: + if 'imuxsock lost' in line: + parts = line.split(None) + if parts[7].isdigit(): + imuxsock_lines += 1 + lost_messages += int(parts[7]) + return "lost {msgs} messages in {lines} lines".format( + msgs=lost_messages, lines=imuxsock_lines + ) + + FakeMessagesClass.scan('lost_messages', count_lost_messages) + + ctx = context_wrap(MESSAGES, path='/var/log/messages') + log = FakeMessagesClass(ctx) + assert not hasattr(log, 'puppet_master_logs') + assert not hasattr(log, 'kernel_logs') + assert not hasattr(log, 'middleware_exception_present') + assert not hasattr(log, 'cron_present') + assert not hasattr(log, 'lost_messages') + log.do_scan('puppet_master_logs') + assert hasattr(log, 'puppet_master_logs') + log.do_scan('kernel_logs') + assert hasattr(log, 'kernel_logs') + log.do_scan('middleware_exception_present') + assert hasattr(log, 'middleware_exception_present') + log.do_scan('cron_present') + assert hasattr(log, 'cron_present') + log.do_scan('lost_messages') + assert hasattr(log, 'lost_messages') + + assert len(log.puppet_master_logs) == 6 + assert ( + log.puppet_master_logs[0]['raw_message'] + == 'Mar 27 03:18:24 system puppet-master[48226]: Setting manifest is deprecated in puppet.conf. See http://links.puppetlabs.com/env-settings-deprecations' + ) + assert log.kernel_logs == [] + assert log.middleware_exception_present + assert not log.cron_present + assert log.lost_messages == 'lost 451 messages in 3 lines' + + +def test_lazylogfileoutput_scanners_list(): + # Messages that are present can be kept + FakeMessagesClass.keep_scan('puppet_master_manifest_logs', ['puppet-master', 'manifest']) + FakeMessagesClass.keep_scan('puppet_master_first', ['puppet-master', 'first']) + # Messages that are present can be kept for any of the list + FakeMessagesClass.keep_scan('puppet_master_first_any', ['puppet-master', 'first'], check=any) + FakeMessagesClass.keep_scan( + 'puppet_master_first_any_3', ['puppet-master', 'first'], num=3, check=any + ) + # Token scan of something that's absent + FakeMessagesClass.token_scan('error_missing', ['ERROR', 'Missing']) + # Token scan of something that's absent + FakeMessagesClass.token_scan('error_info', ['ERROR', 'info']) + # Token scan of something that's absent for any of the list + FakeMessagesClass.token_scan('error_info_any', ['ERROR', 'info'], check=any) + # Get the last line + FakeMessagesClass.last_scan('puppet_master_manifest_last', ['puppet-master', 'manifest']) + # Get the last line for any of the list + FakeMessagesClass.last_scan( + 'puppet_master_first_last_any', ['puppet-master', 'first'], check=any + ) + + ctx = context_wrap(MESSAGES, path='/var/log/messages') + log = FakeMessagesClass(ctx) + + assert not hasattr(log, 'puppet_master_manifest_logs') + assert not hasattr(log, 'error_missing') + assert not hasattr(log, 'puppet_master_first') + assert not hasattr(log, 'error_info') + assert not hasattr(log, 'puppet_master_first_any') + assert not hasattr(log, 'puppet_master_first_any_3') + assert not hasattr(log, 'error_info_any') + assert not hasattr(log, 'puppet_master_manifest_last') + assert not hasattr(log, 'puppet_master_first_last_any') + # scan all and load content + log.do_scan() + assert hasattr(log, 'puppet_master_manifest_logs') + assert len(log.puppet_master_manifest_logs) == 1 + assert hasattr(log, 'error_missing') + assert log.error_missing + + assert hasattr(log, 'puppet_master_first') + assert len(log.puppet_master_first) == 0 + assert hasattr(log, 'error_info') + assert log.error_info is False + + assert hasattr(log, 'puppet_master_first_any') + assert len(log.puppet_master_first_any) == 7 + assert hasattr(log, 'puppet_master_first_any_3') + assert len(log.puppet_master_first_any_3) == 3 + assert hasattr(log, 'error_info_any') + assert log.error_info_any is True + + assert hasattr(log, 'puppet_master_manifest_last') + assert 'puppet-master' in log.puppet_master_manifest_last['raw_message'] + assert 'manifest' in log.puppet_master_manifest_last['raw_message'] + assert hasattr(log, 'puppet_master_first_last_any') + assert 'puppet-master' in log.puppet_master_first_last_any['raw_message'] + + +def test_messages_get_after(): + ctx = context_wrap(MESSAGES, path='/var/log/messages') + log = FakeMessagesClass(ctx) + assert len(log.lines) == 31 + + # Get lines after date + # Remember, logs with no date are assumed to be in year we give it + assert len(list(log.get_after(datetime(2017, 3, 27, 3, 39, 46)))) == 3 + # Get subset of lines after date + pulp = log.get('pulp') # includes /pulp/ in traceback + assert len(pulp) == 8 + # Get lines after date, with one keyword + after = list(log.get_after(datetime(2017, 3, 27, 3, 20, 30), 'pulp')) + assert len(after) == 5 + # Get lines after date, with one keyword + after = list(log.get_after(datetime(2017, 3, 27, 3, 40, 30), 'pulp')) + assert len(after) == 1 + + # Get lines after date, with one keywords list + after = list(log.get_after(datetime(2017, 3, 27, 3, 20, 30), ['pulp', 'ERROR'])) + assert len(after) == 2 + after = list(log.get_after(datetime(2017, 3, 27, 3, 40, 30), 'pulp')) + assert len(after) == 1 + # No lines are found + assert list(log.get_after(datetime(2017, 3, 27, 3, 49, 46), 'pulp')) == [] + + tmp_time = datetime(2017, 3, 27, 3, 39, 46) + with pytest.raises(TypeError): + list(log.get_after(tmp_time, ['type=', False, 'AVC'])) + with pytest.raises(TypeError): + list(log.get_after(tmp_time, set(['type=', 'AVC']))) + + +def test_messages_get_after_bad_time_format(): + ctx = context_wrap(MESSAGES, path='/var/log/messages') + log = FakeMessagesClass(ctx) + assert len(log.lines) == 31 + + # The timestamp format should only contain parts that get_after recognises + with pytest.raises(ParseException) as exc: + assert list(log.bad_get_after(datetime(2017, 3, 27, 3, 39, 46))) is None + assert 'get_after does not understand strptime format ' in str(exc) + + +MESSAGES_ROLLOVER_YEAR = """ +Dec 31 21:43:00 duradm13 [CMA]: Logger failed to open catalog file +Dec 31 22:03:05 duradm13 xinetd[21465]: START: bgssd pid=28021 from=10.20.40.7 +Dec 31 22:03:05 duradm13 xinetd[21465]: EXIT: bgssd status=0 pid=28021 duration=0(sec) +Dec 31 23:03:07 duradm13 xinetd[21465]: START: bgssd pid=31307 from=10.20.40.7 +Dec 31 23:03:07 duradm13 xinetd[21465]: EXIT: bgssd status=0 pid=31307 duration=0(sec) +Dec 31 23:07:00 duradm13 [CMA]: Logger failed to open catalog file +Jan 1 00:00:00 duradm13 [CMA]: Logger failed to open catalog file +Jan 1 00:03:09 duradm13 xinetd[21465]: START: bgssd pid=2203 from=10.20.40.7 +Jan 1 00:03:09 duradm13 xinetd[21465]: EXIT: bgssd status=0 pid=2203 duration=0(sec) +Jan 1 00:11:45 duradm13 xinetd[21465]: START: vnetd pid=2670 from=10.20.40.36 +Jan 1 00:11:47 duradm13 xinetd[21465]: START: vnetd pid=2671 from=10.20.40.36 +Jan 1 00:11:48 duradm13 xinetd[21465]: EXIT: vnetd status=0 pid=2671 duration=1(sec) +Jan 1 01:00:08 duradm13 xinetd[21465]: START: nrpe pid=6189 from=10.20.40.240 +Jan 1 01:00:08 duradm13 xinetd[21465]: EXIT: nrpe status=0 pid=6189 duration=0(sec) +Jan 1 01:00:27 duradm13 xinetd[21465]: START: nrpe pid=6207 from=10.20.40.240 +Jan 1 01:00:27 duradm13 xinetd[21465]: EXIT: nrpe status=0 pid=6207 duration=0(sec) +Jan 1 01:00:29 duradm13 xinetd[21465]: START: nrpe pid=6210 from=10.20.40.240 +Jan 1 01:00:29 duradm13 xinetd[21465]: EXIT: nrpe status=0 pid=6210 duration=0(sec) +""".strip() + + +def test_messages_log_time_wrap(): + # Check rollover process for when dates in logs without a year go from + # December from January. + ctx = context_wrap(MESSAGES_ROLLOVER_YEAR, path='/var/log/messages') + log = FakeMessagesClass(ctx) + assert len(log.lines) == 18 + + # If sought date is in January, we should not get logs from December + found = list(log.get_after(datetime(2017, 1, 1, 1, 0, 0))) + assert len(found) == 6 + # If sought date is in December, we should get logs from January + found = list(log.get_after(datetime(2017, 12, 31, 23, 0, 0))) + assert len(found) == 15 + + +HTTPD_ACCESS_LOG = """ +192.168.220.42 - - [14/Feb/2016:03:18:54 -0600] "POST /XMLRPC HTTP/1.1" 200 1381 "-" "rhn.rpclib.py/$Revision$" +192.168.220.42 - - [14/Feb/2016:03:18:54 -0600] "POST /XMLRPC HTTP/1.1" 200 3282 "-" "rhn.rpclib.py/$Revision$" +192.168.220.42 - - [14/Feb/2016:03:18:54 -0600] "POST /XMLRPC HTTP/1.1" 200 163 "-" "rhn.rpclib.py/$Revision$" +192.168.220.42 - - [14/Feb/2016:03:18:54 -0600] "POST /XMLRPC HTTP/1.1" 200 138 "-" "rhn.rpclib.py/$Revision$" +192.168.128.243 - - [14/Feb/2016:03:19:00 -0600] "POST /cobbler_api HTTP/1.1" 200 144 "-" "Java/1.6.0" +192.168.128.243 - - [14/Feb/2016:03:19:00 -0600] "POST /cobbler_api HTTP/1.1" 200 129 "-" "Java/1.6.0" +192.168.128.243 - - [14/Feb/2016:03:20:00 -0600] "POST /cobbler_api HTTP/1.1" 200 144 "-" "Java/1.6.0" +192.168.128.243 - - [14/Feb/2016:03:20:00 -0600] "POST /cobbler_api HTTP/1.1" 200 129 "-" "Java/1.6.0" +""" + + +class FakeAccessLog(LazyLogFileOutput): + time_format = '%d/%b/%Y:%H:%M:%S' + + +def test_logs_with_year(): + ctx = context_wrap(HTTPD_ACCESS_LOG, path='/var/log/httpd/access_log') + log = FakeAccessLog(ctx) + assert len(log.lines) == 8 + + assert len(list(log.get_after(datetime(2016, 2, 14, 3, 18, 55)))) == 4 + + +DATE_CHANGE_MARIADB_LOG = """ +161109 9:25:42 [Warning] SSL error: SSL_CTX_set_default_verify_paths failed +161109 9:25:42 [Note] WSREP: Service disconnected. +161109 9:25:43 [Note] WSREP: Some threads may fail to exit. +161109 14:28:24 InnoDB: Initializing buffer pool, size = 128.0M +161109 14:28:24 InnoDB: Completed initialization of buffer pool +2017-01-03 09:36:17 139651251140544 [Note] MariaDB 10.1.5 started successfully +""" + + +class FakeMariaDBLog(LazyLogFileOutput): + time_format = ['%y%m%d %H:%M:%S', '%Y-%m-%d %H:%M:%S'] + + +class DictMariaDBLog(LazyLogFileOutput): + time_format = {'old': '%y%m%d %H:%M:%S', 'new': '%Y-%m-%d %H:%M:%S'} + + +class BadClassMariaDBLog(LazyLogFileOutput): + time_format = FakeAccessLog + + +def test_logs_with_two_timestamps(): + ctx = context_wrap(DATE_CHANGE_MARIADB_LOG, path='/var/log/mariadb/mariadb.log') + log = FakeMariaDBLog(ctx) + assert len(log.lines) == 6 + + new_ver = list(log.get_after(datetime(2017, 1, 1, 9, 0, 0))) + assert len(new_ver) == 1 + assert ( + new_ver[0]['raw_message'] + == '2017-01-03 09:36:17 139651251140544 [Note] MariaDB 10.1.5 started successfully' + ) + + dict_log = DictMariaDBLog(ctx) + new_ver_d = list(dict_log.get_after(datetime(2017, 1, 1, 9, 0, 0))) + assert len(new_ver_d) == 1 + assert ( + new_ver_d[0]['raw_message'] + == '2017-01-03 09:36:17 139651251140544 [Note] MariaDB 10.1.5 started successfully' + ) + + # get_after should only supply a string or list for time_format + with pytest.raises(ParseException) as exc: + logerr = BadClassMariaDBLog(ctx) + assert list(logerr.get_after(datetime(2017, 3, 27, 3, 39, 46))) is None + assert 'get_after does not recognise time formats of type ' in str(exc) + + +MILLISECONDS_TOWER_LOG = """ +2020-05-28 19:25:36,892 WARNING django.request Not Found: /api/v1/me/ +2020-05-28 19:25:46,944 INFO awx.api.authentication User admin performed a GET to /api/v2/activity_stream/ through the API +2020-05-28 19:33:03,125 INFO awx.api.authentication User admin performed a GET to /api/v2/job_templates/7/survey_spec/ through the API +2020-05-28 19:33:03,413 INFO awx.api.authentication User admin performed a GET to /api/v2/projects/ through the API +""".strip() + + +class FakeTowerLog(LazyLogFileOutput): + time_format = '%Y-%m-%d %H:%M:%S,%f' + + +def test_logs_with_milliseconds(): + ctx = context_wrap(MILLISECONDS_TOWER_LOG, path='/var/log/tower/tower.log') + log = FakeTowerLog(ctx) + assert len(log.lines) == 4 + assert len(list(log.get_after(datetime(2020, 5, 28, 19, 25, 46, 944)))) == 3