From 64760119e5990b99921f3176890d4d2a4f8537ce Mon Sep 17 00:00:00 2001 From: Xiangce Liu Date: Thu, 2 Jan 2025 12:17:15 +0800 Subject: [PATCH] refactor: split spec_cleaner to multiple sub-modules This is a preparation for the support of IPv6 and MAC address obfuscation. By splitting it to multiple sub-modules, it will be easy for adding new modules, e.g. IPv6 and MAC obfuscation. Jira: RHINENG-15077 - replace the `get_obfuscate_functions` with a new keyword argument `width`, for specs that need to keep the original words width, this `width` needs to be set by hand before content cleaning. In the future, it will be moved to the RegistryPoint for user to set by hand per specs. - the processing of "password" and "keyword" is kind of "obfuscation" which replace the potential `password value` and specified `keywords` to kind of fixed strings/words, so moved it to the obfuscation module. User can exclude it by specifying it with, e.g. "no_obfuscate=['password']" while adding its RegistryPoint in `insights.specs.Specs`. Jira: RHINENG-14756 Signed-off-by: Xiangce Liu --- docs/api_index.rst | 68 ++- insights/cleaner/__init__.py | 218 +++++++ insights/cleaner/filters.py | 34 ++ insights/cleaner/hostname.py | 136 +++++ insights/cleaner/ip.py | 161 ++++++ insights/cleaner/keyword.py | 69 +++ insights/cleaner/mac.py | 14 + insights/cleaner/password.py | 40 ++ insights/cleaner/pattern.py | 34 ++ insights/cleaner/utilities.py | 28 + insights/client/connection.py | 11 +- insights/collect.py | 52 +- insights/core/spec_cleaner.py | 546 ------------------ insights/core/spec_factory.py | 16 +- insights/specs/__init__.py | 10 +- .../spec_cleaner => cleaner}/__init__.py | 0 .../cleaner/test_clean_content_filters.py | 43 ++ .../cleaner/test_clean_content_hostname.py | 58 ++ .../tests/cleaner/test_clean_content_ip.py | 141 +++++ .../cleaner/test_clean_content_keyword.py | 46 ++ .../tests/cleaner/test_clean_content_mult.py | 29 + .../cleaner/test_clean_content_password.py | 43 ++ .../cleaner/test_clean_content_pattern.py | 132 +++++ insights/tests/cleaner/test_clean_file.py | 79 +++ .../tests/cleaner/test_clean_file_keyword.py | 87 +++ .../tests/cleaner/test_clean_file_password.py | 41 ++ .../tests/cleaner/test_clean_file_pattern.py | 118 ++++ .../spec_cleaner => cleaner}/test_reports.py | 50 +- .../core/spec_cleaner/test_clean_content.py | 120 ---- .../spec_cleaner/test_clean_file_obfuscate.py | 276 --------- .../spec_cleaner/test_clean_file_redact.py | 317 ---------- .../test_get_obfuscate_functions.py | 33 -- insights/tests/specs/test_specs.py | 4 +- .../test_specs_content_redaction_empty.py | 2 +- insights/tests/specs/test_specs_filters.py | 4 +- .../test_specs_runtime_ds_obfuscation.py | 2 +- insights/tests/specs/test_specs_save_as.py | 4 +- .../tests/specs/test_specs_special_content.py | 2 +- 38 files changed, 1710 insertions(+), 1358 deletions(-) create mode 100644 insights/cleaner/__init__.py create mode 100644 insights/cleaner/filters.py create mode 100644 insights/cleaner/hostname.py create mode 100644 insights/cleaner/ip.py create mode 100644 insights/cleaner/keyword.py create mode 100644 insights/cleaner/mac.py create mode 100644 insights/cleaner/password.py create mode 100644 insights/cleaner/pattern.py create mode 100644 insights/cleaner/utilities.py delete mode 100644 insights/core/spec_cleaner.py rename insights/tests/{core/spec_cleaner => cleaner}/__init__.py (100%) create mode 100644 insights/tests/cleaner/test_clean_content_filters.py create mode 100644 insights/tests/cleaner/test_clean_content_hostname.py create mode 100644 insights/tests/cleaner/test_clean_content_ip.py create mode 100644 insights/tests/cleaner/test_clean_content_keyword.py create mode 100644 insights/tests/cleaner/test_clean_content_mult.py create mode 100644 insights/tests/cleaner/test_clean_content_password.py create mode 100644 insights/tests/cleaner/test_clean_content_pattern.py create mode 100644 insights/tests/cleaner/test_clean_file.py create mode 100644 insights/tests/cleaner/test_clean_file_keyword.py create mode 100644 insights/tests/cleaner/test_clean_file_password.py create mode 100644 insights/tests/cleaner/test_clean_file_pattern.py rename insights/tests/{core/spec_cleaner => cleaner}/test_reports.py (77%) delete mode 100644 insights/tests/core/spec_cleaner/test_clean_content.py delete mode 100644 insights/tests/core/spec_cleaner/test_clean_file_obfuscate.py delete mode 100644 insights/tests/core/spec_cleaner/test_clean_file_redact.py delete mode 100644 insights/tests/core/spec_cleaner/test_get_obfuscate_functions.py diff --git a/docs/api_index.rst b/docs/api_index.rst index da0a097d89..acae7e8985 100644 --- a/docs/api_index.rst +++ b/docs/api_index.rst @@ -71,13 +71,77 @@ insights.core.spec_factory :undoc-members: insights.core.taglang --------------------------- +--------------------- .. automodule:: insights.core.taglang :members: :show-inheritance: :undoc-members: +insights.cleaner +---------------- + +.. automodule:: insights.cleaner + :members: + :show-inheritance: + :undoc-members: + +insights.cleaner.filters +------------------------ + +.. automodule:: insights.cleaner.filters + :members: + :show-inheritance: + :undoc-members: + +insights.cleaner.hostname +------------------------- + +.. automodule:: insights.cleaner.hostname + :members: + :show-inheritance: + :undoc-members: + +insights.cleaner.ip +------------------- + +.. automodule:: insights.cleaner.ip + :members: + :show-inheritance: + :undoc-members: + +insights.cleaner.keyword +------------------------ + +.. automodule:: insights.cleaner.keyword + :members: + :show-inheritance: + :undoc-members: + +insights.cleaner.mac +-------------------- + +.. automodule:: insights.cleaner.mac + :members: + :show-inheritance: + :undoc-members: + +insights.cleaner.password +------------------------- + +.. automodule:: insights.cleaner.password + :members: + :show-inheritance: + :undoc-members: + +insights.cleaner.pattern +------------------------ + +.. automodule:: insights.cleaner.pattern + :members: + :show-inheritance: + :undoc-members: + insights.parsers ---------------- @@ -208,4 +272,4 @@ insights .. automodule:: insights.collect :members: default_manifest, collect :show-inheritance: - :undoc-members: \ No newline at end of file + :undoc-members: diff --git a/insights/cleaner/__init__.py b/insights/cleaner/__init__.py new file mode 100644 index 0000000000..fd99cf691c --- /dev/null +++ b/insights/cleaner/__init__.py @@ -0,0 +1,218 @@ +""" +Clean Specs (files/commands) +============================ + +The following modules are provided in the Cleaner and can be applied to the +specs during collection according to the user configuration and specs setting. + +- Redaction (patterns redaction) + This is a must-be-done operation to all the collected specs. A `no_redact` + option is available to specs, if it's surely contains non-security + information, e.g. the `machine-id` spec. + +- Filtering + Filter lines as per the allow list got from the `filters.yaml`. The + `filtering` can only be applied when `allowlist` is available (not None) for + the spec. + +- Obfuscation (IPv4, [IPv6], Hostname, MAC, Password, Keywords) + Obfuscate lines in spec content according to the user configuration and + specs requirement. The `no_obfuscate` can be used to exclude obfuscation + target from the obfuscation. Currently, the supported obfuscation target + are: + * hostname + * ip (ipv4) + * ipv6 + * keyword + * mac + * password +""" + +import logging +import json +import os +import six + +from insights.cleaner.filters import AllowFilter +from insights.cleaner.hostname import Hostname +from insights.cleaner.ip import IPv4 # IPv6 +from insights.cleaner.keyword import Keyword + +# from insights.cleaner.mac import Mac +from insights.cleaner.password import Password +from insights.cleaner.pattern import Pattern +from insights.cleaner.utilities import write_report +from insights.util.hostname import determine_hostname +from insights.util.posix_regex import replace_posix + +logger = logging.getLogger(__name__) +DEFAULT_OBFUSCATIONS = { + 'hostname', + 'ip', # ipv4 + 'ipv6', + 'keyword', + 'mac', + 'password', +} + + +class Cleaner(object): + def __init__(self, config, rm_conf, fqdn=None): + self.report_dir = '/tmp' # FIXME + self.rhsm_facts_file = getattr( + config, 'rhsm_facts_file', os.path.join(self.report_dir, 'insights-client.facts') + ) + # Handle User Configuration + rm_conf = rm_conf or {} + exclude = rm_conf.get('patterns', []) + regex = False + if isinstance(exclude, dict) and exclude.get('regex'): + exclude = [r'%s' % replace_posix(i) for i in exclude['regex']] + regex = True + # - Pattern redaction and allow-list filter + self.redact = { + 'pattern': Pattern(exclude, regex) if exclude else None, + 'allow_filter': AllowFilter(), + } + # - Keyword and Password replacement + keywords = rm_conf.get('keywords') + self.obfuscate = { + 'keyword': Keyword(keywords) if keywords else None, + 'password': Password(), + } + + self.fqdn = fqdn if fqdn else determine_hostname() + if config and config.obfuscate: + # - IPv4 obfuscation + self.obfuscate.update(ip=IPv4()) + # # - IPv6 obfuscation + # self.obfuscate.update(ipv6=IPv6()) if config.obfuscate_ipv6 else None + # - Hostname obfuscation + ( + self.obfuscate.update(hostname=Hostname(self.fqdn)) + if config.obfuscate_hostname + else None + ) + # # - MAC obfuscation + # self.obfuscate.update(mac=Mac()) if config.obfuscate_mac else None + + def clean_content(self, lines, no_obfuscate=None, no_redact=False, allowlist=None, width=False): + """ + Clean lines one by one according to the configuration, the cleaned + lines will be returned. + """ + + def _clean_line(line): + for parser, kwargs in parsers: + line = parser.parse_line(line, **kwargs) + return line + + # List of parsers to be applied with Order + parsers = list() + # 1. Redact when NO "no_redact=True" is set + if self.redact['pattern'] and not no_redact: + parsers.append((self.redact['pattern'], {})) if not no_redact else None + # 2. Filter as per allowlist got from add_filter + ( + parsers.append((self.redact['allow_filter'], {'allowlist': allowlist})) + if allowlist is not None + else None + ) + # 3. Obfuscation entries + # - Hostname + # - IPv4 + # - IPv6 + # - Keyword + # - Mac + # - Password + no_obfuscate.append('ipv6') if no_obfuscate and 'ip' in no_obfuscate else None + for obf in set(self.obfuscate.keys()) - set(no_obfuscate or []): + if self.obfuscate[obf]: + parsers.append((self.obfuscate[obf], {'width': width})) + + # handle single string + if not isinstance(lines, list): + return _clean_line(lines) + + result = [] + for line in lines: + line = _clean_line(line) + result.append(line) if line is not None else None + if result and any(l for l in result): + # When there are some lines Truth + return result + # All lines blank + return [] + + def clean_file(self, _file, no_obfuscate=None, no_redact=False, allowlist=None): + """ + Clean a file according to the configuration, the file will be updated + directly with the cleaned content. + """ + logger.debug('Cleaning %s ...' % _file) + + if os.path.exists(_file) and not os.path.islink(_file): + # Process the file + raw_data = content = None + try: + with open(_file, 'r') as fh: + raw_data = fh.readlines() + content = self.clean_content( + raw_data, + no_obfuscate=no_obfuscate, + no_redact=no_redact, + allowlist=allowlist, + width=_file.endswith("netstat_-neopa"), + ) + except Exception as e: # pragma: no cover + logger.warning(e) + raise Exception("Error: Cannot Open File for Cleaning: %s" % _file) + # Store it + try: + if raw_data: + if content: + with open(_file, 'wb') as fh: + for line in content: + fh.write(line.encode('utf-8') if six.PY3 else line) + else: + # Remove Empty file + logger.debug('Removing %s, as it\'s empty after cleaning' % _file) + os.remove(_file) + except Exception as e: # pragma: no cover + logger.warning(e) + raise Exception("Error: Cannot Write to File: %s" % _file) + + def generate_rhsm_facts(self): + logger.info('Writing RHSM facts to %s ...', self.rhsm_facts_file) + + hostname = self.obfuscate.get('hostname') + hn_mapping = hostname.mapping() if hostname else [] + + keyword = self.obfuscate.get('keyword') + kw_mapping = keyword.mapping() if keyword else [] + + ipv4 = self.obfuscate.get('ip') + ipv4_mapping = ipv4.mapping() if ipv4 else [] + + facts = { + 'insights_client.hostname': self.fqdn, + 'insights_client.obfuscate_ip_enabled': 'ip' in self.obfuscate, + # 'insights_client.obfuscate_ipv6_enabled': 'ipv6' in self.obfuscate, + # 'insights_client.obfuscate_mac_enabled': 'mac' in self.obfuscate, + 'insights_client.obfuscate_hostname_enabled': 'hostname' in self.obfuscate, + 'insights_client.obfuscated_ipv4': json.dumps(ipv4_mapping), + # 'insights_client.obfuscated_ipv6': json.dumps(), + # 'insights_client.obfuscated_mac': json.dumps(), + 'insights_client.obfuscated_keyword': json.dumps(kw_mapping), + 'insights_client.obfuscated_hostname': json.dumps(hn_mapping), + } + + write_report(facts, self.rhsm_facts_file) + + def generate_report(self, archive_name): + # Always generate the rhsm.facts files + self.generate_rhsm_facts() + # Generate CSV reports accordingly + for parser in list(self.redact.values()) + list(self.obfuscate.values()): + if parser: + parser.generate_report(self.report_dir, archive_name) diff --git a/insights/cleaner/filters.py b/insights/cleaner/filters.py new file mode 100644 index 0000000000..2b69a35530 --- /dev/null +++ b/insights/cleaner/filters.py @@ -0,0 +1,34 @@ +""" +Filtering +========= +""" + +import logging + +logger = logging.getLogger(__name__) + + +class AllowFilter(object): + """ + Class for filtering per allow list. + """ + + def parse_line(self, line, **kwargs): + # filter line as per the allow list specified by plugins + if not line: + return line + allowlist = kwargs.get('allowlist', {}) + if allowlist: + for a_key in list(allowlist.keys()): + # keep line when any filter match + # FIXME: + # Considering performance, didn't handle multiple filters in one same line + if a_key in line: + allowlist[a_key] -= 1 + # stop checking it when enough lines contain the key were found + allowlist.pop(a_key) if allowlist[a_key] == 0 else None + return line + # discard line when none filters found + + def generate_report(self, report_dir, archive_name): + pass # pragma: no cover diff --git a/insights/cleaner/hostname.py b/insights/cleaner/hostname.py new file mode 100644 index 0000000000..0f56f634f1 --- /dev/null +++ b/insights/cleaner/hostname.py @@ -0,0 +1,136 @@ +""" +Hostname Obfuscation +==================== +""" + +import logging +import hashlib +import os +import re +import six + +from insights.cleaner.utilities import write_report + +logger = logging.getLogger(__name__) + + +class Hostname(object): + """ + Class for obfuscating hostname. + + .. note:: + + Currently, only the system hostname will be obfuscated, see: + - https://docs.redhat.com/en/documentation/red_hat_insights/1-latest/html/client_configuration_guide_for_red_hat_insights/assembly-client-data-obfuscation#proc-obfuscating-hostname_insights-cg-obfuscation + + """ + + def __init__(self, fqdn): + # - Hostname obfuscate information + self.fqdn = fqdn + name_list = self.fqdn.split('.') + self.hostname = name_list[0] + self.domain = None if len(name_list) <= 1 else '.'.join(name_list[1:]) + + # - Hostname obfuscate information + self._hn_db = dict() # hostname database + self.hostname_count = 0 + self.obfuscated_domain = 'example.com' + + # - Domain name obfuscate information + self.dn_db = dict() # domain name database + self.domain_count = 0 + + self._domains2db() + self.obfuscated_fqdn = '{0}.example.com'.format( + hashlib.sha1(self.fqdn.encode('utf-8') if six.PY3 else self.fqdn).hexdigest()[:12] + ) + self.hostname_count += 1 + self._hn_db[self.obfuscated_fqdn] = self.fqdn + + def _domains2db(self): + # adds any additional domainnames to the domain database to be searched for + try: + # we will add the root domain for an FQDN as well. + if self.domain is not None: + self.dn_db[self.obfuscated_domain] = self.domain + logger.debug( + "Obfuscated Domain Created - %s -> %s" % (self.domain, self.obfuscated_domain) + ) + + self.domain_count = len(self.dn_db) + return True + except Exception as e: # pragma: no cover + logger.warning(e) + + def _hn2db(self, hn): + ''' + This will add a hostname for a hostname for an included domain or return an existing entry + ''' + db = self._hn_db + hn_found = False + for k, v in db.items(): + if v == hn: # the hostname is in the database + ret_hn = k + hn_found = True + if hn_found: + return ret_hn + else: + # we have a new hostname, so we increment the counter to get the host ID number + self.hostname_count += 1 + o_domain = self.obfuscated_domain + for od, d in self.dn_db.items(): + if d in hn: # pragma: no cover # never false + o_domain = od + new_hn = "host%s.%s" % (self.hostname_count, o_domain) + self._hn_db[new_hn] = hn + return new_hn + + def parse_line(self, line, **kwargs): + ''' + This will replace the exact hostname and all instances of the domain name with the obfuscated alternatives. + Example: + ''' + if not line: + return line + try: + for od, d in self.dn_db.items(): + # regex = re.compile(r'\w*\.%s' % d) + regex = re.compile(r'(?![\W\-\:\ \.])[a-zA-Z0-9\-\_\.]*\.%s' % d) + hostnames = [each for each in regex.findall(line)] + if len(hostnames) > 0: + for hn in hostnames: + new_hn = self._hn2db(hn) + logger.debug("Obfuscating FQDN - %s > %s", hn, new_hn) + line = line.replace(hn, new_hn) + if self.hostname: # pragma: no cover # never false + # catch any non-fqdn instances of the system hostname + line = line.replace(self.hostname, self._hn2db(self.fqdn)) + return line + except Exception as e: # pragma: no cover + logger.warning(e) + raise Exception('SubHostnameError: Unable to Substitute Hostname/Domainname') + + def mapping(self): + mapping = [] + for k, v in self._hn_db.items(): + mapping.append({'original': v, 'obfuscated': k}) + return mapping + + def generate_report(self, report_dir, archive_name): + try: + hn_report_file = os.path.join(report_dir, "%s-hostname.csv" % archive_name) + logger.info('Creating Hostname Report - %s', hn_report_file) + lines = ['Obfuscated Hostname,Original Hostname'] + if self.hostname_count > 0: + for k, v in self._hn_db.items(): + lines.append('{0},{1}'.format(k, v)) + else: # pragma: no cover # never false + lines.append('None,None') + except Exception as e: # pragma: no cover + logger.exception(e) + raise Exception('CreateReport Error: Error Creating Hostname Report') + + write_report(lines, hn_report_file) + + logger.info('Completed Hostname Report.') diff --git a/insights/cleaner/ip.py b/insights/cleaner/ip.py new file mode 100644 index 0000000000..7fa376c411 --- /dev/null +++ b/insights/cleaner/ip.py @@ -0,0 +1,161 @@ +""" +IP Obfuscation +============== +The following cleaners are included in this module: + +IPv4 Obfuscation +---------------- + +IPv6 Obfuscation +---------------- + +""" + +import logging +import os +import re +import socket +import struct + +from insights.cleaner.utilities import write_report + +logger = logging.getLogger(__name__) + + +class IPv4(object): + """ + Class for obfuscating IPv4. + """ + + def __init__(self): + # - IP obfuscate information + self._ip_db = dict() # IP database + self.start_ip = '10.230.230.1' + self.ignore_list = ["127.0.0.1"] + + def _ip2int(self, ipstr): + # converts a dotted decimal IP address into an integer that can be incremented + return struct.unpack('!I', socket.inet_aton(ipstr))[0] + + def _int2ip(self, num): + # converts an integer stored in the IP database into a dotted decimal IP + return socket.inet_ntoa(struct.pack('!I', num)) + + def _ip2db(self, ip): + ''' + adds an IP address to the IP database and returns the obfuscated entry, or returns the + existing obfuscated IP entry + FORMAT: + {$obfuscated_ip: $original_ip,} + ''' + ip_num = self._ip2int(ip) + ip_found = False + db = self._ip_db + for k, v in db.items(): + if v == ip_num: + ret_ip = self._int2ip(k) + ip_found = True + if ip_found: # the entry already existed + return ret_ip + else: # the entry did not already exist + if len(self._ip_db) > 0: + new_ip = max(db.keys()) + 1 + else: + new_ip = self._ip2int(self.start_ip) + db[new_ip] = ip_num + + return self._int2ip(new_ip) + + def parse_line(self, line, **kwargs): + ''' + This will substitute an obfuscated IP for each instance of a given IP in a file + This is called in the self.clean_* function, along with user _sub_* functions to scrub a given + line in a file. + It scans a given line and if an IP exists, it obfuscates the IP using _ip2db and returns the altered line + ''' + + def _sub_ip(line, ip): + new_ip = self._ip2db(ip) + logger.debug("Obfuscating IP - %s > %s", ip, new_ip) + return line.replace(ip, new_ip) + + def _sub_ip_keep_width(line, ip): + ip_len = len(ip) + new_ip = self._ip2db(ip) + new_ip_len = len(new_ip) + logger.debug("Obfuscating IP - %s > %s", ip, new_ip) + # pad or remove spaces to allow for the new length + if ip_len > new_ip_len: + numspaces = ip_len - new_ip_len + line = line.replace(ip, new_ip) + # shift past port specification to add spaces + idx = line.index(new_ip) + new_ip_len + c = line[idx] + while c != " ": + idx += 1 + if idx == len(line): + idx = len(line) - 1 + break + c = line[idx] + return line[0:idx] + numspaces * " " + line[idx:] + elif new_ip_len > ip_len: + numspaces = new_ip_len - ip_len + line = line.replace(ip, new_ip) + # shift past port specification to skip spaces + idx = line.index(new_ip) + new_ip_len + c = line[idx] + while c != " ": + idx += 1 + if idx == len(line): + break + c = line[idx] + return line[0:idx] + line[(idx + numspaces) :] + else: + return line.replace(ip, new_ip) + + if not line: + return line + try: + pattern = r"(((\b25[0-5]|\b2[0-4][0-9]|\b1[0-9][0-9]|\b[1-9][0-9]|\b[1-9]))(\.(\b25[0-5]|\b2[0-4][0-9]|\b1[0-9][0-9]|\b[1-9][0-9]|\b[0-9])){3})" + ips = [each[0] for each in re.findall(pattern, line)] + if len(ips) > 0: + for ip in sorted(ips, key=len, reverse=True): + if ip not in self.ignore_list and ip in line: + if kwargs.get('width', False): + line = _sub_ip_keep_width(line, ip) + else: + line = _sub_ip(line, ip) + return line + except Exception as e: # pragma: no cover + logger.warning(e) + raise Exception('SubIPError: Unable to Substitute IP Address - %s', ips) + + def mapping(self): + mapping = [] + for k, v in self._ip_db.items(): + mapping.append({'original': self._int2ip(v), 'obfuscated': self._int2ip(k)}) + return mapping + + def generate_report(self, report_dir, archive_name): + try: + ip_report_file = os.path.join(report_dir, "%s-ip.csv" % archive_name) + logger.info('Creating IP Report - %s', ip_report_file) + lines = ['Obfuscated IP,Original IP'] + for k, v in self._ip_db.items(): + lines.append('{0},{1}'.format(self._int2ip(k), self._int2ip(v))) + except Exception as e: # pragma: no cover + logger.exception(e) + raise Exception('CreateReport Error: Error Creating IP Report') + + write_report(lines, ip_report_file) + + logger.info('Completed IP Report.') + + +# TODO +class IPv6(object): + """ + Class for obfuscating IPv6. + """ + + pass diff --git a/insights/cleaner/keyword.py b/insights/cleaner/keyword.py new file mode 100644 index 0000000000..364cd2bcba --- /dev/null +++ b/insights/cleaner/keyword.py @@ -0,0 +1,69 @@ +""" +Keyword replacement +=================== +""" + +import logging +import os + +from insights.cleaner.utilities import write_report + +logger = logging.getLogger(__name__) + + +class Keyword(object): + """ + Class to replace the keyword specified by users to "keyword#" + """ + + def __init__(self, keywords=None): + # - Keyword replacement redact information + # Keyword replacement does NOT depend on "obfuscate=True" + self._kw_key = "keyword" + self._kw_db = dict() # keyword database + self.obfuscated = set() # keywords that have been replaced + self._keywords2db(keywords) + + def _keywords2db(self, keywords): + # processes optional keywords to add to be obfuscated + try: + if keywords: + k_count = 0 + for keyword in keywords: + keyword = keyword.strip() + o_kw = "{0}{1}".format(self._kw_key, k_count) + self._kw_db[keyword] = o_kw + logger.debug("Added Obfuscated Keyword - %s", o_kw) + k_count += 1 + logger.debug("Added All keyword Contents from Customer's configuration") + except Exception as e: # pragma: no cover + logger.warning(e) + + def parse_line(self, line, **kwargs): + if not line: + return line + for k, v in self._kw_db.items(): + if k in line: + logger.debug("Replacing Keyword - %s > %s", k, v) + line = line.replace(k, v) + self.obfuscated.add(k) + return line + + def mapping(self): + mapping = [] + for k in self.obfuscated: + mapping.append({'original': k, 'obfuscated': self._kw_db[k]}) + return mapping + + def generate_report(self, report_dir, archive_name): + try: + kw_report_file = os.path.join(report_dir, "%s-keyword.csv" % archive_name) + logger.info('Creating Keyword Report - %s', kw_report_file) + lines = ['Replaced Keyword,Original Keyword'] + for k in self.obfuscated: + lines.append('{0},{1}'.format(k, self._kw_db[k])) + except Exception as e: # pragma: no cover + logger.exception(e) + raise Exception('CreateReport Error: Error Creating Keyword Report') + write_report(lines, kw_report_file) + logger.info('Completed Keyword Report.') diff --git a/insights/cleaner/mac.py b/insights/cleaner/mac.py new file mode 100644 index 0000000000..ad89179cde --- /dev/null +++ b/insights/cleaner/mac.py @@ -0,0 +1,14 @@ +""" +MAC Obfuscation +=============== +""" + +import logging + + +logger = logging.getLogger(__name__) + + +# TODO +class Mac(object): + pass diff --git a/insights/cleaner/password.py b/insights/cleaner/password.py new file mode 100644 index 0000000000..b36f029416 --- /dev/null +++ b/insights/cleaner/password.py @@ -0,0 +1,40 @@ +""" +Password Replacement +==================== + +""" + +import logging +import re + +logger = logging.getLogger(__name__) + +DEFAULT_PASSWORD_REGEXS = [ + r"(password[a-zA-Z0-9_]*)(\s*\:\s*\"*\s*|\s*\"*\s*=\s*\"\s*|\s*=+\s*|\s*--md5+\s*|\s*)([a-zA-Z0-9_!@#$%^&*()+=/-]+)", + r"(password[a-zA-Z0-9_]*)(\s*\*+\s+)(.+)", +] + + +class Password(object): + """ + Class to replace the possible password to "********". + + .. note:: + + Currently, the "passowrd" is the only keyword to check for potential + Password. + """ + + def parse_line(self, line, **kwargs): + if not line: + return line + # password obfuscation + for regex in DEFAULT_PASSWORD_REGEXS: + tmp_line = line + line = re.sub(regex, r"\1\2********", tmp_line) + if line != tmp_line: + break + return line + + def generate_report(self, report_dir, archive_name): + pass diff --git a/insights/cleaner/pattern.py b/insights/cleaner/pattern.py new file mode 100644 index 0000000000..54fec3e178 --- /dev/null +++ b/insights/cleaner/pattern.py @@ -0,0 +1,34 @@ +""" +Pattern redaction +================= +""" + +import logging +import re + +logger = logging.getLogger(__name__) + + +class Pattern(object): + """ + Class for redacting "patterns" configured in "file-content.redaction.yaml". + """ + + def __init__(self, exclude, regex=False): + self.exclude = exclude or [] + self.regex = regex + + def parse_line(self, line, **kwargs): + # redact line per the file-content-redaction.yaml + if not line: + return line + # patterns removal + find = re.search if self.regex else lambda x, y: x in y + if any(find(pat, line) for pat in self.exclude): + logger.debug("Pattern matched, removing line: %s" % line.strip()) + # patterns found, remove it + return None + return line + + def generate_report(self, report_dir, archive_name): + pass # pragma: no cover diff --git a/insights/cleaner/utilities.py b/insights/cleaner/utilities.py new file mode 100644 index 0000000000..b318d3437a --- /dev/null +++ b/insights/cleaner/utilities.py @@ -0,0 +1,28 @@ +""" +Utilities for spec Cleaner +========================== +""" + +import logging +import json +import os + +logger = logging.getLogger(__name__) + + +def write_report(report, report_file, mode=0o644): + # Get the current umask + umask = os.umask(0o022) + # Reset the umask + os.umask(umask) + try: + with open(report_file, 'w') as fp: + if isinstance(report, dict): + json.dump(report, fp) + elif isinstance(report, list): + for line in report: + fp.write("{0}\n".format(line)) + # Change the file mode per the current umask + os.chmod(report_file, mode & ~umask) + except (IOError, OSError) as e: # pragma: no cover + logger.error('Could not write to %s: %s', report_file, str(e)) diff --git a/insights/client/connection.py b/insights/client/connection.py index 656ba1b448..ad8000de6e 100644 --- a/insights/client/connection.py +++ b/insights/client/connection.py @@ -33,9 +33,8 @@ size_in_mb) from .cert_auth import rhsmCertificate from .constants import InsightsConstants as constants -from insights import package_info +from insights import cleaner, package_info from insights.client.collection_rules import InsightsUploadConf -from insights.core import spec_cleaner from insights.util.canonical_facts import get_canonical_facts warnings.simplefilter('ignore') @@ -1194,13 +1193,9 @@ def _deep_clean(data): for i, item in enumerate(data): data[i] = _deep_clean(item) elif isinstance(data, str): - return cleaner.clean_content( - data, - obf_funcs=obf_funcs, - no_redact=False) + return _cleaner.clean_content(data) return data # Clean (obfuscate and redact) the "c_facts" pc = InsightsUploadConf(self.config) - cleaner = spec_cleaner.Cleaner(self.config, pc.get_rm_conf()) - obf_funcs = cleaner.get_obfuscate_functions() + _cleaner = cleaner.Cleaner(self.config, pc.get_rm_conf()) return _deep_clean(cfacts) diff --git a/insights/collect.py b/insights/collect.py index d9418b239b..917a26065b 100755 --- a/insights/collect.py +++ b/insights/collect.py @@ -21,24 +21,26 @@ from insights import apply_configs from insights import apply_default_enabled from insights import get_pool +from insights.cleaner import Cleaner from insights.core import blacklist from insights.core import dr from insights.core import filters from insights.core.serde import Hydration -from insights.core.spec_cleaner import Cleaner from insights.util import fs from insights.util import utc from insights.util.hostname import determine_hostname from insights.util.subproc import call SAFE_ENV = { - "PATH": os.path.pathsep.join([ - "/bin", - "/usr/bin", - "/sbin", - "/usr/sbin", - "/usr/share/Modules/bin", - ]), + "PATH": os.path.pathsep.join( + [ + "/bin", + "/usr/bin", + "/sbin", + "/usr/sbin", + "/usr/share/Modules/bin", + ] + ), "LC_ALL": "C", } @@ -247,14 +249,12 @@ enabled: true """.strip() -EXCEPTIONS_TO_REPORT = set([ - OSError -]) +EXCEPTIONS_TO_REPORT = set([OSError]) """Exception types that should be reported on after core collection.""" def load_manifest(data): - """ Helper for loading a manifest yaml doc. """ + """Helper for loading a manifest yaml doc.""" if isinstance(data, dict): return data if os.path.isfile(data): @@ -338,6 +338,7 @@ def get_to_persist(persisters): Given a specification of what to persist, generates the corresponding set of components. """ + def specs(): for p in persisters: if isinstance(p, dict): @@ -385,8 +386,14 @@ def generate_archive_name(): return "insights-%s-%s" % (hostname, suffix) -def collect(client_config=None, rm_conf=None, tmp_path=None, - archive_name=None, compress=False, manifest=None): +def collect( + client_config=None, + rm_conf=None, + tmp_path=None, + archive_name=None, + compress=False, + manifest=None, +): """ This is the collection entry point. It accepts a manifest, a temporary directory in which to store output, and a boolean for optional compression. @@ -520,7 +527,13 @@ def main(): # The main fxn is only invoked as a cli, if calling from another cli then # use the collect function instead collect_args = [arg for arg in sys.argv[1:]] if len(sys.argv) > 1 else [] - sys.argv = [sys.argv[0], ] if sys.argv else sys.argv + sys.argv = ( + [ + sys.argv[0], + ] + if sys.argv + else sys.argv + ) p = argparse.ArgumentParser() p.add_argument("-m", "--manifest", help="Manifest yaml.") @@ -542,9 +555,12 @@ def main(): logging.basicConfig(level=level) out_path = args.out_path or tempfile.gettempdir() - archive, errors = collect(manifest=args.manifest, tmp_path=out_path, - archive_name=generate_archive_name(), - compress=args.compress) + archive, errors = collect( + manifest=args.manifest, + tmp_path=out_path, + archive_name=generate_archive_name(), + compress=args.compress, + ) print(archive) diff --git a/insights/core/spec_cleaner.py b/insights/core/spec_cleaner.py deleted file mode 100644 index b1e210d178..0000000000 --- a/insights/core/spec_cleaner.py +++ /dev/null @@ -1,546 +0,0 @@ -""" -Clean the collected specs (files/commands/datasources) -====================================================== - -The following processes will be applied to clean the collected specs: - -- Redaction - This is a must-be-done operation to all the collected specs. - -- Obfuscation - Obfuscate the IP or Hostname appears in the spec content according to the - specs native requirement and user configuration. - -- Filtering - Filter line as per the allow list got from the "filters.yaml" -""" - -import logging -import hashlib -import json -import os -import re -import six -import socket -import struct - -from insights.util.hostname import determine_hostname -from insights.util.posix_regex import replace_posix - -logger = logging.getLogger(__name__) - -DEFAULT_PASSWORD_REGEXS = [ - r"(password[a-zA-Z0-9_]*)(\s*\:\s*\"*\s*|\s*\"*\s*=\s*\"\s*|\s*=+\s*|\s*--md5+\s*|\s*)([a-zA-Z0-9_!@#$%^&*()+=/-]+)", - r"(password[a-zA-Z0-9_]*)(\s*\*+\s+)(.+)", -] -"""The regex for password removal, which is read from the "/etc/insights-client/.exp.sed".""" - - -def write_report(report, report_file, mode=0o644): - # Get the current umask - umask = os.umask(0o022) - # Reset the umask - os.umask(umask) - try: - with open(report_file, 'w') as fp: - if isinstance(report, dict): - json.dump(report, fp) - elif isinstance(report, list): - for line in report: - fp.write("{0}\n".format(line)) - # Change the file mode per the current umask - os.chmod(report_file, mode & ~umask) - except (IOError, OSError) as e: - logger.error('Could not write to %s: %s', report_file, str(e)) - - -class Cleaner(object): - def __init__(self, config, rm_conf, fqdn=None): - self.report_dir = '/tmp' - self.rhsm_facts_file = getattr( - config, 'rhsm_facts_file', os.path.join(self.report_dir, 'insights-client.facts') - ) - # Obfuscation - set: ip and hostname only - self.obfuscate = set() - self.obfuscate.add('ip') if config and config.obfuscate else None - self.obfuscate.add('hostname') if config and config.obfuscate_hostname else None - - # File Content Redaction - # - Pattern redaction - rm_conf = rm_conf or {} - exclude = rm_conf.get('patterns', []) - regex = False - if isinstance(exclude, dict) and exclude.get('regex'): - exclude = [r'%s' % replace_posix(i) for i in exclude['regex']] - regex = True - self.redact = dict(exclude=exclude, regex=regex) - - # - Keyword replacement redact information - # Keyword replacement does NOT depend on "obfuscate=True" - keywords = rm_conf.get('keywords') - self.kw_db = dict() # keyword database - self.kws = set() # keywords that have been replaced - self._keywords2db(keywords) - - # Obfuscation - fqdn = fqdn if fqdn else determine_hostname() - name_list = fqdn.split('.') - self.hostname = name_list[0] - self.fqdn = fqdn - self.domain = None if len(name_list) <= 1 else '.'.join(name_list[1:]) - - # - IP obfuscate information - self.ip_db = dict() # IP database - self.start_ip = '10.230.230.1' - - # - Hostname obfuscate information - self.hn_db = dict() # hostname database - self.hostname_count = 0 - self.obfuscated_fqdn = None - self.obfuscated_domain = 'example.com' # right now this needs to be a 2nd level domain, like foo.com, example.com, domain.org, etc. - - # - Domain name obfuscate information - self.dn_db = dict() # domain name database - self.domain_count = 0 - - if config and config.obfuscate_hostname and self.fqdn: - self._domains2db() - hashed_hostname = hashlib.sha1( - self.fqdn.encode('utf-8') if six.PY3 else self.fqdn - ).hexdigest()[:12] - self.obfuscated_fqdn = '{0}.example.com'.format(hashed_hostname) - self.hostname_count += 1 - self.hn_db[self.obfuscated_fqdn] = self.fqdn - # per https://access.redhat.com/documentation/en-us/red_hat_insights/2023/html/client_configuration_guide_for_red_hat_insights/con-insights-client-cg-data-obfuscation_insights-cg-obfuscation#proc-obfuscating-hostname_insights-cg-obfuscation - # only `hostname` is obfuscated - - ########################### - # IP functions # - ########################### - - def _ip2int(self, ipstr): - # converts a dotted decimal IP address into an integer that can be incremented - return struct.unpack('!I', socket.inet_aton(ipstr))[0] - - def _int2ip(self, num): - # converts an integer stored in the IP database into a dotted decimal IP - return socket.inet_ntoa(struct.pack('!I', num)) - - def _ip2db(self, ip): - ''' - adds an IP address to the IP database and returns the obfuscated entry, or returns the - existing obfuscated IP entry - FORMAT: - {$obfuscated_ip: $original_ip,} - ''' - ip_num = self._ip2int(ip) - ip_found = False - db = self.ip_db - for k, v in db.items(): - if v == ip_num: - ret_ip = self._int2ip(k) - ip_found = True - if ip_found: # the entry already existed - return ret_ip - else: # the entry did not already exist - if len(self.ip_db) > 0: - new_ip = max(db.keys()) + 1 - else: - new_ip = self._ip2int(self.start_ip) - db[new_ip] = ip_num - - return self._int2ip(new_ip) - - def _sub_ip(self, line): - ''' - This will substitute an obfuscated IP for each instance of a given IP in a file - This is called in the self.clean_* function, along with user _sub_* functions to scrub a given - line in a file. - It scans a given line and if an IP exists, it obfuscates the IP using _ip2db and returns the altered line - ''' - try: - pattern = r"(((\b25[0-5]|\b2[0-4][0-9]|\b1[0-9][0-9]|\b[1-9][0-9]|\b[1-9]))(\.(\b25[0-5]|\b2[0-4][0-9]|\b1[0-9][0-9]|\b[1-9][0-9]|\b[0-9])){3})" - ips = [each[0] for each in re.findall(pattern, line)] - if len(ips) > 0: - for ip in sorted(ips, key=len, reverse=True): - # skip loopback (https://github.com/RedHatInsights/insights-core/issues/3230#issuecomment-924859845) - if ip != "127.0.0.1" and ip in line: - new_ip = self._ip2db(ip) - logger.debug("Obfuscating IP - %s > %s", ip, new_ip) - line = line.replace(ip, new_ip) - return line - except Exception as e: # pragma: no cover - logger.warning(e) - raise Exception('SubIPError: Unable to Substitute IP Address - %s', ips) - - def _sub_ip_netstat(self, line): - ''' - Special version of _sub_ip for netstat to preserve spacing - ''' - try: - pattern = r"(((\b25[0-5]|\b2[0-4][0-9]|\b1[0-9][0-9]|\b[1-9][0-9]|\b[1-9]))(\.(\b25[0-5]|\b2[0-4][0-9]|\b1[0-9][0-9]|\b[1-9][0-9]|\b[0-9])){3})" - ips = [each[0] for each in re.findall(pattern, line)] - if len(ips) > 0: - for ip in sorted(ips, key=len, reverse=True): - # skip loopback (https://github.com/RedHatInsights/insights-core/issues/3230#issuecomment-924859845) - if ip != "127.0.0.1" and ip in line: - ip_len = len(ip) - new_ip = self._ip2db(ip) - new_ip_len = len(new_ip) - logger.debug("Obfuscating IP - %s > %s", ip, new_ip) - # pad or remove spaces to allow for the new length - if ip_len > new_ip_len: - numspaces = ip_len - new_ip_len - line = line.replace(ip, new_ip) - - # shift past port specification to add spaces - idx = line.index(new_ip) + new_ip_len - c = line[idx] - while c != " ": - idx += 1 - if idx == len(line): - idx = len(line) - 1 - break - c = line[idx] - line = line[0:idx] + numspaces * " " + line[idx:] - - elif new_ip_len > ip_len: - numspaces = new_ip_len - ip_len - line = line.replace(ip, new_ip) - - # shift past port specification to skip spaces - idx = line.index(new_ip) + new_ip_len - c = line[idx] - while c != " ": - idx += 1 - if idx == len(line): - break - c = line[idx] - line = line[0:idx] + line[(idx + numspaces) :] - - else: - line = line.replace(ip, new_ip) - return line - except Exception as e: # pragma: no cover - logger.warning(e) - raise Exception('SubIPError: Unable to Substitute IP Address - %s', ip) - - ############################# - # Hostname Domain Functions # - ############################# - - def _domains2db(self): - # adds any additional domainnames to the domain database to be searched for - try: - # we will add the root domain for an FQDN as well. - if self.domain is not None: - self.dn_db[self.obfuscated_domain] = self.domain - logger.debug( - "Obfuscated Domain Created - %s -> %s" % (self.domain, self.obfuscated_domain) - ) - - self.domain_count = len(self.dn_db) - return True - except Exception as e: # pragma: no cover - logger.warning(e) - - def _hn2db(self, hn): - ''' - This will add a hostname for a hostname for an included domain or return an existing entry - ''' - db = self.hn_db - hn_found = False - for k, v in db.items(): - if v == hn: # the hostname is in the database - ret_hn = k - hn_found = True - if hn_found: - return ret_hn - else: - # we have a new hostname, so we increment the counter to get the host ID number - self.hostname_count += 1 - o_domain = self.obfuscated_domain - for od, d in self.dn_db.items(): - if d in hn: # never false - o_domain = od - new_hn = "host%s.%s" % (self.hostname_count, o_domain) - self.hn_db[new_hn] = hn - return new_hn - - def _sub_hostname(self, line): - ''' - This will replace the exact hostname and all instances of the domain name with the obfuscated alternatives. - Example: - ''' - if not line: - return line - try: - for od, d in self.dn_db.items(): - # regex = re.compile(r'\w*\.%s' % d) - regex = re.compile(r'(?![\W\-\:\ \.])[a-zA-Z0-9\-\_\.]*\.%s' % d) - hostnames = [each for each in regex.findall(line)] - if len(hostnames) > 0: - for hn in hostnames: - new_hn = self._hn2db(hn) - logger.debug("Obfuscating FQDN - %s > %s", hn, new_hn) - line = line.replace(hn, new_hn) - if self.hostname: - # catch any non-fqdn instances of the system hostname - line = line.replace(self.hostname, self._hn2db(self.fqdn)) - return line - except Exception as e: # pragma: no cover - logger.warning(e) - raise Exception('SubHostnameError: Unable to Substitute Hostname/Domainname') - - ########################### - # Keyword functions # - ########################### - - def _keywords2db(self, keywords): - # processes optional keywords to add to be obfuscated - try: - if keywords: - k_count = 0 - for keyword in keywords: - o_kw = "keyword%s" % k_count - self.kw_db[keyword.rstrip()] = o_kw - logger.debug("Added Obfuscated Keyword - %s", o_kw) - k_count += 1 - logger.debug("Added Keyword Contents from Customer's configuration") - - except Exception as e: # pragma: no cover - logger.warning(e) - - def _sub_keywords(self, line): - # this will substitute out any keyword entries on a given line - if not line: - return line - for k, v in self.kw_db.items(): - if k in line: - line = line.replace(k, v) - self.kws.add(k) - logger.debug("Replacing Keyword - %s > %s", k, v) - return line - - ########################### - # Main functions # - ########################### - - def _obfuscate_line(self, line, obf_funcs): - # obfuscate line for possible hostname, ip - if not line: - return line - for func in obf_funcs: - tmp_line = func(line) - line = tmp_line - return line - - def _redact_line(self, line): - # redact line per the file-content-redaction.yaml - if not line: - return line - # 1. patterns removal - find = re.search if self.redact['regex'] else lambda x, y: x in y - if any(find(pat, line) for pat in self.redact.get('exclude', [])): - logger.debug("Pattern matched, removing line: %s" % line.strip()) - # patterns found, remove it - return None - # 2. password removal - for regex in DEFAULT_PASSWORD_REGEXS: - tmp_line = line - line = re.sub(regex, r"\1\2********", tmp_line) - if line != tmp_line: - break - # 3. keyword replacement redaction - return self._sub_keywords(line) - - def _filter_line_per_allowlist(self, line, allow_info): - # filter line as per the allow list specified by plugins - if not line: - return line - if allow_info: - for a_key in list(allow_info.keys()): - # keep line when any filter match - # FIXME: - # Considering performance, didn't handle multiple filters in one same line - if a_key in line: - allow_info[a_key] -= 1 - # stop checking it when enough lines contain the key were found - allow_info.pop(a_key) if allow_info[a_key] == 0 else None - return line - # discard line when none filters found - - def get_obfuscate_functions(self, filename='', no_obfuscate=None): - """ - Return the list of required obfuscation function according to the - filename and configuration. By default, it returns: - - [] when obfuscate=False - - [self._sub_ip] when obfuscate=True Only - - [self._sub_hostname, self._sub_ip] when obfuscate_hostname=True - """ - obf_funcs = [] - # Get the actual obfuscate list setting for this file - obfs = set(self.obfuscate) - set(no_obfuscate or []) - # IP obfuscation entry - ( - obf_funcs.append( - self._sub_ip_netstat if filename.endswith("netstat_-neopa") else self._sub_ip - ) - if "ip" in obfs - else None - ) - # Hostname obfuscation entry - obf_funcs.append(self._sub_hostname) if "hostname" in obfs else None - return obf_funcs - - def clean_content(self, lines, obf_funcs=None, no_redact=False, allowlist=None): - """ - Clean lines one by one according to the configuration, the cleaned - lines will be returned. - """ - - def _clean_line(_line): - # 1. Do Redaction by default, unless "no_redact=True" - if _line and not no_redact: - _line = self._redact_line(_line) - # 2. Do filtering as per allowlist got from "filters.yaml" - if _line and allowlist is not None: - _line = self._filter_line_per_allowlist(_line, allowlist) - # 3. Do Obfuscation as per the "obf_funcs" - return self._obfuscate_line(_line, obf_funcs or []) - - # handle single string - if not isinstance(lines, list): - return _clean_line(lines) - - result = [] - for line in lines: - line = _clean_line(line) - result.append(line) if line is not None else None - if result and list(filter(None, result)): - # When there are some lines Truth - return result - # All lines blank - return [] - - def clean_file(self, _file, no_obfuscate=None, no_redact=False, allowlist=None): - """ - Clean a file according to the configuration, the file will be updated - directly with the cleaned content. - """ - logger.debug('Cleaning %s ...' % _file) - - if os.path.exists(_file) and not os.path.islink(_file): - # Process the file - raw_data = content = None - obf_funcs = self.get_obfuscate_functions(_file, no_obfuscate) - try: - with open(_file, 'r') as fh: - raw_data = fh.readlines() - content = self.clean_content( - raw_data, - obf_funcs=obf_funcs, - no_redact=no_redact, - allowlist=allowlist, - ) - except Exception as e: # pragma: no cover - logger.warning(e) - raise Exception("Error: Cannot Open File for Cleaning: %s" % _file) - # Store it - try: - if raw_data: - if content: - with open(_file, 'wb') as fh: - for line in content: - fh.write(line.encode('utf-8') if six.PY3 else line) - else: - # Remove Empty file - logger.debug('Removing %s, as it\'s empty after cleaning' % _file) - os.remove(_file) - except Exception as e: # pragma: no cover - logger.warning(e) - raise Exception("Error: Cannot Write to File: %s" % _file) - - def generate_rhsm_facts(self): - logger.info('Writing RHSM facts to %s ...', self.rhsm_facts_file) - - hn_block = [] - for k, v in self.hn_db.items(): - hn_block.append({'original': v, 'obfuscated': k}) - - kw_block = [] - for k in self.kws: - kw_block.append({'original': k, 'obfuscated': self.kw_db[k]}) - - ip_block = [] - for k, v in self.ip_db.items(): - ip_block.append({'original': self._int2ip(v), 'obfuscated': self._int2ip(k)}) - - facts = { - 'insights_client.hostname': self.fqdn, - 'insights_client.obfuscate_ip_enabled': 'ip' in self.obfuscate, - 'insights_client.ips': json.dumps(ip_block), - 'insights_client.obfuscate_hostname_enabled': 'hostname' in self.obfuscate, - 'insights_client.hostnames': json.dumps(hn_block), - 'insights_client.keywords': json.dumps(kw_block), - } - - write_report(facts, self.rhsm_facts_file) - - def generate_ip_report(self, archive_name): - try: - ip_report_file = os.path.join(self.report_dir, "%s-ip.csv" % archive_name) - logger.info('Creating IP Report - %s', ip_report_file) - lines = ['Obfuscated IP,Original IP'] - for k, v in self.ip_db.items(): - lines.append('{0},{1}'.format(self._int2ip(k), self._int2ip(v))) - except Exception as e: # pragma: no cover - logger.exception(e) - raise Exception('CreateReport Error: Error Creating IP Report') - - write_report(lines, ip_report_file) - - logger.info('Completed IP Report.') - - def generate_hn_report(self, archive_name): - try: - hn_report_file = os.path.join(self.report_dir, "%s-hostname.csv" % archive_name) - logger.info('Creating Hostname Report - %s', hn_report_file) - lines = ['Obfuscated Hostname,Original Hostname'] - if self.hostname_count > 0: - for k, v in self.hn_db.items(): - lines.append('{0},{1}'.format(k, v)) - else: - lines.append('None,None') - except Exception as e: # pragma: no cover - logger.exception(e) - raise Exception('CreateReport Error: Error Creating Hostname Report') - - write_report(lines, hn_report_file) - - logger.info('Completed Hostname Report.') - - def generate_kw_report(self, archive_name): - try: - kw_report_file = os.path.join(self.report_dir, "%s-keyword.csv" % archive_name) - logger.info('Creating Keyword Report - %s', kw_report_file) - lines = ['Replaced Keyword,Original Keyword'] - for k in self.kws: - lines.append('{0},{1}'.format(k, self.kw_db[k])) - except Exception as e: # pragma: no cover - logger.exception(e) - raise Exception('CreateReport Error: Error Creating Keyword Report') - - write_report(lines, kw_report_file) - - logger.info('Completed Keyword Report.') - - def generate_report(self, archive_name): - # Always generate the rhsm.facts files - self.generate_rhsm_facts() - if 'ip' in self.obfuscate: - self.generate_ip_report(archive_name) - if 'hostname' in self.obfuscate: - self.generate_hn_report(archive_name) - if self.kws: - self.generate_kw_report(archive_name) diff --git a/insights/core/spec_factory.py b/insights/core/spec_factory.py index 6eaac2c70c..19674bb999 100644 --- a/insights/core/spec_factory.py +++ b/insights/core/spec_factory.py @@ -12,6 +12,7 @@ from glob import glob from subprocess import call +from insights.cleaner import DEFAULT_OBFUSCATIONS from insights.core import blacklist, dr, filters from insights.core.context import ExecutionContext, FSRoots, HostContext from insights.core.exceptions import ( @@ -79,8 +80,8 @@ def _stream(self): def _clean_content(self): """ - Clean (Obfuscate and Redact) the Spec Content ONLY when doing - collection. + Clean (Redact, Filter, and Obfuscate) the Spec Content ONLY when + collecting data. """ content = self.content # load first for debugging info order if content and isinstance(self.ctx, HostContext) and self.ds and self.cleaner: @@ -90,7 +91,7 @@ def _clean_content(self): cleans.append("Redact") if not no_red else None # Obfuscating? no_obf = getattr(self.ds, 'no_obfuscate', []) - cleans.append("Obfuscate") if len(no_obf) < 2 else None + cleans.append("Obfuscate") if set(no_obf) != DEFAULT_OBFUSCATIONS else None # Filtering? allowlist = None if self._filterable: @@ -101,9 +102,10 @@ def _clean_content(self): log.debug("Cleaning (%s) %s", "/".join(cleans), self.relative_path) content = self.cleaner.clean_content( content[::-1], # Scan from bottom + no_obfuscate=no_obf, allowlist=allowlist, - obf_funcs=self.cleaner.get_obfuscate_functions(self.relative_path, no_obf), no_redact=no_red, + width=self.relative_path.endswith("netstat_-neopa"), )[::-1] # ^ Reverse to the right order then if len(content) == 0: @@ -241,7 +243,11 @@ def __repr__(self): class RawFileProvider(FileProvider): """ Class used in datasources that returns the contents of a file a single - string. The file is not filtered/obfuscated/redacted. + string. + + .. note:: + + The content of RawFileProvider is not filtered/obfuscated/redacted. """ def load(self): diff --git a/insights/specs/__init__.py b/insights/specs/__init__.py index 8ada6a254c..ea55c01464 100644 --- a/insights/specs/__init__.py +++ b/insights/specs/__init__.py @@ -408,7 +408,7 @@ class Specs(SpecSet): lvs_noheadings = RegistryPoint(no_obfuscate=['hostname', 'ip']) lvs_noheadings_all = RegistryPoint(no_obfuscate=['hostname', 'ip']) mac_addresses = RegistryPoint(multi_output=True, no_obfuscate=['hostname', 'ip']) - machine_id = RegistryPoint(no_obfuscate=['hostname', 'ip'], no_redact=True) + machine_id = RegistryPoint(no_obfuscate=['hostname', 'ip', 'password'], no_redact=True) manila_conf = RegistryPoint(no_obfuscate=['hostname', 'ip']) mariadb_log = RegistryPoint(filterable=True) max_uid = RegistryPoint(no_obfuscate=['hostname', 'ip']) @@ -547,8 +547,8 @@ class Specs(SpecSet): pcp_metrics = RegistryPoint() pcp_openmetrics_log = RegistryPoint(filterable=True) pcp_raw_data = RegistryPoint( - raw=True, multi_output=True, no_obfuscate=['hostname', 'ip'], no_redact=True - ) # No Parser required + raw=True, multi_output=True + ) # No Parser required; Raw Spec will be not Obfuscated/Redacted/Filterd pcs_config = RegistryPoint() pcs_quorum_status = RegistryPoint() pcs_status = RegistryPoint() @@ -716,13 +716,13 @@ class Specs(SpecSet): sshd_config = RegistryPoint(filterable=True) sshd_config_d = RegistryPoint(multi_output=True, filterable=True) sshd_config_perms = RegistryPoint(no_obfuscate=['hostname', 'ip']) - sshd_test_mode = RegistryPoint(filterable=True, no_redact=True) + sshd_test_mode = RegistryPoint(filterable=True, no_obfuscate=['password']) sssd_config = RegistryPoint() sssd_conf_d = RegistryPoint(multi_output=True) sssd_logs = RegistryPoint(multi_output=True, filterable=True) sys_block_queue_stable_writes = RegistryPoint(multi_output=True) subscription_manager_facts = RegistryPoint(filterable=True) - subscription_manager_id = RegistryPoint(no_obfuscate=['ip'], no_redact=True) + subscription_manager_id = RegistryPoint(no_obfuscate=['ip']) subscription_manager_installed_product_ids = RegistryPoint( filterable=True, no_obfuscate=['hostname', 'ip'] ) diff --git a/insights/tests/core/spec_cleaner/__init__.py b/insights/tests/cleaner/__init__.py similarity index 100% rename from insights/tests/core/spec_cleaner/__init__.py rename to insights/tests/cleaner/__init__.py diff --git a/insights/tests/cleaner/test_clean_content_filters.py b/insights/tests/cleaner/test_clean_content_filters.py new file mode 100644 index 0000000000..0d0ca02494 --- /dev/null +++ b/insights/tests/cleaner/test_clean_content_filters.py @@ -0,0 +1,43 @@ +from pytest import mark + +from insights.cleaner import Cleaner +from insights.client.config import InsightsConfig + +test_data = 'testabc\nabcd\n \n\n1234\npwd: p4ssw0rd\ntest123\npwd:abc\n'.splitlines() + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_content_filters_allowlist(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + + pp = Cleaner(conf, None) + ret = pp.clean_content(test_data, allowlist={'test': 2, 'pwd': 1}) + # content IS changed + assert test_data != ret + assert 'testabc' in ret # 1 of 2 matched + assert 'test123' in ret # 2 of 2 matched + assert 'pwd: p4ssw0rd' in ret # 1 of 1 matched + assert 'pwd:abc' not in ret # Max count matched + assert '1234' not in ret + assert 'abcd' not in ret + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_content_filters_allowlist_empty(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + + pp = Cleaner(conf, None) + ret = pp.clean_content(test_data, allowlist={}) + # content IS changed + assert test_data != ret + assert ret == [] + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_content_filters_allowlist_not(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + + pp = Cleaner(conf, None) + ret = pp.clean_content(test_data, allowlist=None) + # content IS NOT changed + assert test_data == ret diff --git a/insights/tests/cleaner/test_clean_content_hostname.py b/insights/tests/cleaner/test_clean_content_hostname.py new file mode 100644 index 0000000000..af73b95eaf --- /dev/null +++ b/insights/tests/cleaner/test_clean_content_hostname.py @@ -0,0 +1,58 @@ +from mock.mock import patch + +from insights.client.config import InsightsConfig +from insights.cleaner import Cleaner + + +def test_obfuscate_hostname(): + hostname = 'test1.abc.com' + line = "a line with %s here, test2.abc.com, test.redhat.com" % hostname + c = InsightsConfig(obfuscate=True, obfuscate_hostname=True, hostname=hostname) + pp = Cleaner(c, {}, hostname) + actual = pp.clean_content(line) + assert 'test1' not in actual + assert 'test2' not in actual + assert 'abc.com' not in actual + assert len(actual.split('.')[0].split()[-1]) == 12 + assert '.example.com' in actual + + line = "a line w/o hostname, but test2.abc.com only" + actual = pp.clean_content(line) + assert 'test2' not in actual + assert 'abc.com' not in actual + assert '.example.com' in actual + assert len(actual.split('.')[0].split()[-1]) != 12 + + hostname = 'test1' # Short hostname + line = "a line with %s here, test2.def.com" % hostname + pp = Cleaner(c, {}, hostname) + actual = pp.clean_content(line) + assert hostname not in actual + assert 'test2.def.com' in actual + + line = "a line w/o hostname" + hostname = 'test1.abc.com' + pp = Cleaner(c, {}, hostname) + actual = pp.clean_content(line) + assert line == actual + + +@patch("insights.cleaner.determine_hostname", return_value='test1.abc.com') +def test_obfuscate_hostname_determine_hostanme(hn): + hostname = 'test1.abc.com' + c = InsightsConfig(obfuscate=True, obfuscate_hostname=True, display_name='disp.abc.com') + line = "a line with %s here, test2.def.com" % hostname + pp = Cleaner(c, {}) # passed empty hostname to cleaner, determain it + actual = pp.clean_content(line) + assert hostname not in actual + assert len(actual.split('.')[0].split()[-1]) == 12 + assert 'test2.def.com' in actual + + +@patch("insights.cleaner.determine_hostname", return_value='test1.abc.com') +def test_obfuscate_hostname_empty_line(hn): + c = InsightsConfig(obfuscate=True, obfuscate_hostname=True, display_name='disp.abc.com') + line = "" + pp = Cleaner(c, {}) # passed empty hostname to cleaner, determain it + actual = pp.clean_content(line) + assert actual == line diff --git a/insights/tests/cleaner/test_clean_content_ip.py b/insights/tests/cleaner/test_clean_content_ip.py new file mode 100644 index 0000000000..a96e6076b4 --- /dev/null +++ b/insights/tests/cleaner/test_clean_content_ip.py @@ -0,0 +1,141 @@ +from mock.mock import patch +from pytest import mark + +from insights.client.config import InsightsConfig +from insights.cleaner import Cleaner + + +@mark.parametrize( + ("line", "expected"), + [ + ("test_no_ip", "test_no_ip"), + ("test 127.0.0.1", "test 127.0.0.1"), + ("radius_ip_1=10.0.0.1", "radius_ip_1=10.230.230.1"), + ( + ( + " inet 10.0.2.15" + " netmask 255.255.255.0" + " broadcast 10.0.2.255" + " dup 10.0.2.15" + ), + ( + " inet 10.230.230.3" + " netmask 10.230.230.1" + " broadcast 10.230.230.2" + " dup 10.230.230.3" + ), + ), + ( + ["inet 10.0.2.15", " netmask 255.255.255.0", " broadcast 10.0.2.255", "dup 10.0.2.15"], + [ + "inet 10.230.230.1", + " netmask 10.230.230.2", + " broadcast 10.230.230.3", + "dup 10.230.230.1", + ], + ), + ( + "radius_ip_1=10.0.0.100-10.0.0.200", + "radius_ip_1=10.230.230.1-10.230.230.2", + ), + ], +) +def test_obfuscate_ip_match(line, expected): + c = InsightsConfig(obfuscate=True) + pp = Cleaner(c, {}) + actual = pp.clean_content(line) + assert actual == expected + + +@mark.parametrize( + ("line", "expected"), + [ + ( + (" inet 10.0.2.155" " netmask 10.0.2.1" " broadcast 10.0.2.15"), + (" inet 10.230.230.1" " netmask 10.230.230.3" " broadcast 10.230.230.2"), + ), + ], +) +def test_obfuscate_ip_match_IP_overlap(line, expected): + c = InsightsConfig(obfuscate=True) + pp = Cleaner(c, {}) + actual = pp.clean_content(line) + assert actual == expected + + +@mark.parametrize( + ("line", "expected"), + [ + ("test_no_ip", "test_no_ip"), + ("test 127.0.0.1", "test 127.0.0.1"), + ( + "tcp6 0 0 100.100.100.101:23 10.231.200.1:63564 ESTABLISHED 0", + "tcp6 0 0 10.230.230.1:23 10.230.230.2:63564 ESTABLISHED 0", + ), + ( + "tcp6 0 0 10.0.0.1:23 10.0.0.110:63564 ESTABLISHED 0", + "tcp6 0 0 10.230.230.2:23 10.230.230.1:63564 ESTABLISHED 0", + ), + ( + "tcp6 10.0.0.11 0 10.0.0.1:23 10.0.0.111:63564 ESTABLISHED 0", + "tcp6 10.230.230.2 0 10.230.230.3:23 10.230.230.1:63564 ESTABLISHED 0", + ), + ( + "unix 2 [ ACC ] STREAM LISTENING 43279 2070/snmpd 172.31.0.1\n", + "unix 2 [ ACC ] STREAM LISTENING 43279 2070/snmpd 10.230.230.1\n", + ), + ( + "unix 2 [ ACC ] STREAM LISTENING 43279 2070/snmpd 172.31.111.11\n", + "unix 2 [ ACC ] STREAM LISTENING 43279 2070/snmpd 10.230.230.1 \n", + ), + ], +) +def test_obfuscate_ip_match_IP_overlap_netstat(line, expected): + c = InsightsConfig(obfuscate=True) + pp = Cleaner(c, {}) + actual1 = pp.clean_content(line, width=True) + assert actual1 == expected + + +@mark.parametrize( + ("original", "expected"), + [ + ( + "{\"name\":\"shadow-utils\"," + "\"epoch\":\"2\"," + "\"version\":\"4.1.5.1\"," + "\"release\":\"5.el6\"," + "\"arch\":\"x86_64\"," + "\"installtime\":\"Wed 13 Jan 2021 10:04:18 AM CET\"," + "\"buildtime\":\"1455012203\"," + "\"vendor\":\"Red Hat, Inc.\"," + "\"buildhost\":\"x86-027.build.eng.bos.redhat.com\"," + "\"sigpgp\":" + "\"RSA/8, " + "Tue 08 Mar 2016 11:15:08 AM CET, " + "Key ID 199e2f91fd431d51\"}", + "{\"name\":\"shadow-utils\"," + "\"epoch\":\"2\"," + "\"version\":\"10.230.230.1\"," + "\"release\":\"5.el6\"," + "\"arch\":\"x86_64\"," + "\"installtime\":\"Wed 13 Jan 2021 10:04:18 AM CET\"," + "\"buildtime\":\"1455012203\"," + "\"vendor\":\"Red Hat, Inc.\"," + "\"buildhost\":\"x86-027.build.eng.bos.redhat.com\"," + "\"sigpgp\":" + "\"RSA/8, " + "Tue 08 Mar 2016 11:15:08 AM CET, " + "Key ID 199e2f91fd431d51\"}", + ) + ], +) +@patch("insights.cleaner.ip.IPv4._ip2db", return_value="10.230.230.1") +def test_obfuscate_ip_false_positive(_ip2db, original, expected): + c = InsightsConfig(obfuscate=True) + pp = Cleaner(c, {}) + actual = pp.clean_content(original) + assert actual == expected + # "no_obfuscate=['ip'] + actual = pp.clean_content(original, no_obfuscate=['ip']) + assert actual == original diff --git a/insights/tests/cleaner/test_clean_content_keyword.py b/insights/tests/cleaner/test_clean_content_keyword.py new file mode 100644 index 0000000000..51e2a9708d --- /dev/null +++ b/insights/tests/cleaner/test_clean_content_keyword.py @@ -0,0 +1,46 @@ +from pytest import mark + +from insights.cleaner import Cleaner +from insights.cleaner.keyword import Keyword +from insights.client.config import InsightsConfig + +test_data = 'test\nabcd\n \n\n1234\npwd: p4ssw0rd\n'.splitlines() + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_content_keyword_empty_not_change(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + + pp = Cleaner(conf, {}) # empty keywords + ret = pp.clean_content(test_data, []) + # content is NOT changed + assert test_data == ret + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_content_keyword_changed_keyword(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + + pp = Cleaner(conf, {'keywords': ['test']}) + ret = pp.clean_content(test_data, []) + # content IS changed + assert test_data != ret + assert 'test' not in ret[0] + assert 'keyword0' in ret[0] + assert ret[1] == test_data[1] + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_content_keyword_no_such_keyword_to_change(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + + pp = Cleaner(conf, {'keywords': ['t_e_s_t']}) # no such keyword + ret = pp.clean_content(test_data, []) + # content is NOT changed + assert test_data == ret + assert ret[1] == test_data[1] + + +def test_keyword_empty(): + kw = Keyword([]) # no keyword + assert kw._kw_db == dict() diff --git a/insights/tests/cleaner/test_clean_content_mult.py b/insights/tests/cleaner/test_clean_content_mult.py new file mode 100644 index 0000000000..9fa2bfd558 --- /dev/null +++ b/insights/tests/cleaner/test_clean_content_mult.py @@ -0,0 +1,29 @@ +from insights.cleaner import Cleaner +from insights.client.config import InsightsConfig + + +def test_obfuscate_hostname_and_ip(): + hostname = 'test1.abc.com' + line = "test1.abc.com, 10.0.0.1 test1.abc.loc, 20.1.4.7 smtp.abc.com, 10.1.2.7 lite.abc.com" + c = InsightsConfig(obfuscate=True, obfuscate_hostname=True, hostname=hostname) + pp = Cleaner(c, {}, hostname) + result = pp.clean_content(line) + assert 'example.com' in result + assert '10.230.230' in result + for item in line.split(): + assert item not in result + + +def test_clean_content_keyword_with_hostname_and_ip(): + hostname = 'test1.abc.com' + line = "test1.abc.com, 10.0.0.1, test1.abc.loc, 20.1.4.7, smtp.abc.com, what's your name?, what day is today?" + conf = InsightsConfig(obfuscate=True, obfuscate_hostname=True, hostname=hostname) + pp = Cleaner(conf, {'keywords': ['name', 'day']}, hostname) + result = pp.clean_content(line) + assert 'test1.abc.com' not in result + assert '10.0.0.1' not in result + assert '20.1.4.7' not in result + assert 'name' not in result + assert 'day' not in result + assert 'keyword0' in result + assert 'keyword1' in result diff --git a/insights/tests/cleaner/test_clean_content_password.py b/insights/tests/cleaner/test_clean_content_password.py new file mode 100644 index 0000000000..596a9c8e30 --- /dev/null +++ b/insights/tests/cleaner/test_clean_content_password.py @@ -0,0 +1,43 @@ +from pytest import mark + +from insights.cleaner import Cleaner +from insights.client.config import InsightsConfig + +test_data_sensitive = 'test \n\n\nabcd\n1234\npassword: p4ssw0rd\n'.splitlines() + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_content_password_line_changed_password(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + + pp = Cleaner(conf, {}) + ret = pp.clean_content(test_data_sensitive, []) + # content IS changed + assert test_data_sensitive != ret + assert 'p4ssw0rd' not in ret[-1] + assert '********' in ret[-1] + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_content_password_disabled_by_no_obfuscate(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + + pp = Cleaner(conf, {}) + ret = pp.clean_content(test_data_sensitive, ['password']) + # content is NOT changed + assert test_data_sensitive == ret + + +@mark.parametrize( + ("line", "expected"), + [ + ("password: p@ss_W0rd ?", "password: ******** ?"), + ("password = p@ss_W0rd ?", "password = ******** ?"), + ("password=p@ss_W0-d", "password=********"), + ], +) +def test_clean_content_password(line, expected): + c = InsightsConfig() + pp = Cleaner(c, {'patterns': {'regex': ['myserver', r'my(\w*)key']}}) + actual = pp.clean_content(line) + assert actual == expected diff --git a/insights/tests/cleaner/test_clean_content_pattern.py b/insights/tests/cleaner/test_clean_content_pattern.py new file mode 100644 index 0000000000..60b6cbb383 --- /dev/null +++ b/insights/tests/cleaner/test_clean_content_pattern.py @@ -0,0 +1,132 @@ +from pytest import mark + +from insights.cleaner import Cleaner +from insights.client.config import InsightsConfig + +test_data = 'test\nabcd\n \n\n1234\npwd: p4ssw0rd\n'.splitlines() + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_content_patterns_exclude_regex(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + + rm_conf = {'patterns': {'regex': ['12.*4', '^abcd']}} + pp = Cleaner(conf, rm_conf) + ret = pp.clean_content(test_data, []) + # content IS changed + assert test_data != ret + assert '1234' not in ret + assert 'abcd' not in ret + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_content_result_empty(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + + rm_conf = {'patterns': {'regex': [' ', '12.*4', '^abcd', 'test', 'pwd', 'w0rd']}} + pp = Cleaner(conf, rm_conf) + ret = pp.clean_content(test_data, []) + # result content is Empty + assert len(ret) == 0 + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_content_patterns_exclude_no_regex(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + + rm_conf = {'patterns': ['1234', 'abcde']} + pp = Cleaner(conf, rm_conf) + ret = pp.clean_content(test_data, []) + # content IS changed + assert test_data != ret + assert '1234' not in ret + assert 'abcd' in ret + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_content_patterns_exclude_empty(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + + rm_conf = {'patterns': []} + pp = Cleaner(conf, rm_conf) + ret = pp.clean_content(test_data, []) + # file is NOT changed + assert test_data == ret + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_content_exclude_none(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + + pp = Cleaner(conf, None) + ret = pp.clean_content(test_data, []) + # file is NOT changed + assert test_data == ret + + +@mark.parametrize( + ("line", "expected"), + [ + ( + "what's your name? what day is today?", + "what's your keyword0? what keyword1 is tokeyword1?", + ), + ], +) +@mark.parametrize("obfuscate", [True, False]) +def test_clean_content_keyword_extract_cases(obfuscate, line, expected): + conf = InsightsConfig(obfuscate=obfuscate) + pp = Cleaner(conf, {'keywords': ['name', 'day']}) + actual = pp.clean_content(line) + assert actual == expected + + +@mark.parametrize( + ("line", "expected"), + [ + ("test1.abc.com: it's myserver? what is yours?", None), + ("testabc: it's mykey? what is yours?", None), + ( + "testabc: it's my1key? what is yours?", + "testabc: it's my1key? what is yours?", + ), + ], +) +def test_clean_content_exclude_patterns(line, expected): + c = InsightsConfig() + pp = Cleaner(c, {'patterns': ['myserver', 'mykey']}) + actual = pp.clean_content(line) + assert actual == expected + + +@mark.parametrize( + ("line", "expected"), + [ + ("test.abc.com: it's myserver? what is yours?", None), + ("testabc: it's mykey? what is yours?", None), + ("testabc: it's my1key? what is yours?", None), + ("test1: it's my-key? what is yours?", "test1: it's my-key? what is yours?"), + ], +) +def test_clean_content_patterns_regex(line, expected): + c = InsightsConfig() + pp = Cleaner(c, {'patterns': {'regex': ['myserver', r'my(\w*)key']}}) + actual = pp.clean_content(line) + assert actual == expected + + +@mark.parametrize( + ("line", "expected"), + [ + ("test.abc.com: it's myserver? what is yours?", None), + ("testabc: it's mykey? what is yours?", None), + ("testabc: it's my1key? what is yours?", None), + ("test1: it's my-key? what is yours?", None), + ("test: it's my-key? what is yours?", "test: it's my-key? what is yours?"), + ], +) +def test_clean_content_patterns_posix_regex(line, expected): + c = InsightsConfig() + pp = Cleaner(c, {'patterns': {'regex': ['myserver', r'my(\w*)key', 'test[[:digit:]]']}}) + actual = pp.clean_content(line) + assert actual == expected diff --git a/insights/tests/cleaner/test_clean_file.py b/insights/tests/cleaner/test_clean_file.py new file mode 100644 index 0000000000..ef8cb3f80c --- /dev/null +++ b/insights/tests/cleaner/test_clean_file.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +import os + +from mock.mock import patch + +from insights.client.archive import InsightsArchive +from insights.client.config import InsightsConfig +from insights.cleaner import Cleaner + + +def test_clean_file_obfuscate(): + conf = InsightsConfig(obfuscate=True) + arch = InsightsArchive(conf) + arch.create_archive_dir() + + # netstat_-neopa + line = "tcp6 0 0 10.0.0.1:23 10.0.0.110:63564 ESTABLISHED 0" + ret = "tcp6 0 0 10.230.230.2:23 10.230.230.1:63564 ESTABLISHED 0" + + test_dir = os.path.join(arch.archive_dir, 'data', 'etc') + os.makedirs(test_dir) + pp = Cleaner(conf, {}) + + # netstat + test_file = os.path.join(arch.archive_dir, 'data', 'testfile.netstat_-neopa') + with open(test_file, 'w') as t: + t.write(line) + pp.clean_file(test_file, no_obfuscate=[]) + # file is changed per netstat logic + with open(test_file, 'r') as t: + assert ret == ''.join(t.readlines()) + + arch.delete_archive_dir() + + +def test_clean_file_obfuscate_disabled_by_no_obfuscate(): + conf = InsightsConfig(obfuscate=True) + arch = InsightsArchive(conf) + arch.create_archive_dir() + + # netstat_-neopa + line = "tcp6 0 0 10.0.0.1:23 10.0.0.110:63564 ESTABLISHED 0" + + test_dir = os.path.join(arch.archive_dir, 'data', 'etc') + os.makedirs(test_dir) + pp = Cleaner(conf, {}) + + # netstat + test_file = os.path.join(arch.archive_dir, 'data', 'testfile.netstat_-neopa') + with open(test_file, 'w') as t: + t.write(line) + pp.clean_file(test_file, no_obfuscate=['ip']) + # file is NOT changed + with open(test_file, 'r') as t: + assert line == ''.join(t.readlines()) + + arch.delete_archive_dir() + + +@patch("insights.cleaner.Cleaner.clean_content") +def test_clean_file_non_exist(func): + conf = InsightsConfig(obfuscate=True) + arch = InsightsArchive(conf) + arch.create_archive_dir() + + test_dir = os.path.join(arch.archive_dir, 'data', 'etc') + os.makedirs(test_dir) + pp = Cleaner(conf, {}) + + pp.clean_file('non_existing_file', no_obfuscate=[]) + func.assert_not_called() + + # empty file + test_file = os.path.join(arch.archive_dir, 'data', 'etc', 'x.conf') + open(test_file, 'w').close() + pp.clean_file(test_file, no_obfuscate=[]) + func.assert_called_once() + + arch.delete_archive_dir() diff --git a/insights/tests/cleaner/test_clean_file_keyword.py b/insights/tests/cleaner/test_clean_file_keyword.py new file mode 100644 index 0000000000..b872c9c62d --- /dev/null +++ b/insights/tests/cleaner/test_clean_file_keyword.py @@ -0,0 +1,87 @@ +import os + +from pytest import mark + +from insights.client.archive import InsightsArchive +from insights.client.config import InsightsConfig +from insights.cleaner import Cleaner + +test_file_data = 'test\nabcd\n1234\npwd: p4ssw0rd\n' + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_file_keyword_empty_not_change(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + arch = InsightsArchive(conf) + arch.create_archive_dir() + + # put something in the archive to redact + test_file = os.path.join(arch.archive_dir, 'test.file') + with open(test_file, 'w') as t: + t.write(test_file_data) + + pp = Cleaner(conf, {}) # empty keywords + pp.clean_file(test_file, []) + # file is NOT changed + with open(test_file, 'r') as t: + assert test_file_data == ''.join(t.readlines()) + arch.delete_archive_dir() + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_file_keyword_changed_keyword(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + arch = InsightsArchive(conf) + arch.create_archive_dir() + + # put something in the archive to redact + test_file = os.path.join(arch.archive_dir, 'test.file') + with open(test_file, 'w') as t: + t.write(test_file_data) + + pp = Cleaner(conf, {'keywords': ['test']}) + pp.clean_file(test_file, []) + # file is changed + with open(test_file, 'r') as t: + data = t.readlines() + assert 'test' not in data[0] + assert 'keyword0' in data[0] + arch.delete_archive_dir() + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_file_keyword_no_such_keyword_to_change(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + arch = InsightsArchive(conf) + arch.create_archive_dir() + + # put something in the archive to redact + test_file = os.path.join(arch.archive_dir, 'test.file') + with open(test_file, 'w') as t: + t.write(test_file_data) + + pp = Cleaner(conf, {'keywords': ['t_e_s_t']}) # no such keyword + pp.clean_file(test_file, []) + # file is NOT changed + with open(test_file, 'r') as t: + assert test_file_data == ''.join(t.readlines()) + arch.delete_archive_dir() + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_file_keyword_disabled_by_no_obfuacate(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + arch = InsightsArchive(conf) + arch.create_archive_dir() + + # put something in the archive to redact + test_file = os.path.join(arch.archive_dir, 'test.file') + with open(test_file, 'w') as t: + t.write(test_file_data) + + pp = Cleaner(conf, {'keywords': 'test'}) + pp.clean_file(test_file, ['keyword']) + # file is NOT changed + with open(test_file, 'r') as t: + assert test_file_data == ''.join(t.readlines()) + arch.delete_archive_dir() diff --git a/insights/tests/cleaner/test_clean_file_password.py b/insights/tests/cleaner/test_clean_file_password.py new file mode 100644 index 0000000000..1ffc1eee0a --- /dev/null +++ b/insights/tests/cleaner/test_clean_file_password.py @@ -0,0 +1,41 @@ +import os + +from pytest import mark + +from insights.client.archive import InsightsArchive +from insights.client.config import InsightsConfig +from insights.cleaner import Cleaner + +test_file_data_sensitive = ( + 'test\nabcd\n1234\npassword: p4ssw0rd here\npassword= p4ssw0rd here\npassword' +) + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_file_line_changed_password(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + arch = InsightsArchive(conf) + arch.create_archive_dir() + + # put something in the archive to redact + test_file = os.path.join(arch.archive_dir, 'test.file') + with open(test_file, 'w') as t: + t.write(test_file_data_sensitive) + old_data = test_file_data_sensitive.splitlines() + + pp = Cleaner(conf, {}) + pp.clean_file(test_file, []) + # file is changed + pwd_line_cnt = 0 + with open(test_file, 'r') as t: + new_data = t.readlines() + for idx, line in enumerate(old_data): + if 'p4ssw0rd' in line: + pwd_line_cnt += 1 + assert 'p4ssw0rd' not in new_data[idx] + assert '********' in new_data[idx] + if line.endswith('password'): + pwd_line_cnt += 1 + assert line == new_data[idx] + assert pwd_line_cnt == 3 + arch.delete_archive_dir() diff --git a/insights/tests/cleaner/test_clean_file_pattern.py b/insights/tests/cleaner/test_clean_file_pattern.py new file mode 100644 index 0000000000..7fc369def3 --- /dev/null +++ b/insights/tests/cleaner/test_clean_file_pattern.py @@ -0,0 +1,118 @@ +import os + +from pytest import mark + +from insights.client.archive import InsightsArchive +from insights.client.config import InsightsConfig +from insights.cleaner import Cleaner + +test_file_data = 'test\nabcd\n1234\npwd: p4ssw0rd\n' + + +@mark.parametrize( + ("line", "expected"), + [ + ( + "what's your name? what day is today?", + "what's your keyword0? what keyword1 is tokeyword1?", + ), + ], +) +@mark.parametrize("obfuscate", [True, False]) +def test_clean_file_patterns_exclude_regex(obfuscate, line, expected): + conf = InsightsConfig(obfuscate=obfuscate) + arch = InsightsArchive(conf) + arch.create_archive_dir() + + # put something in the archive to redact + test_file = os.path.join(arch.archive_dir, 'test.file') + with open(test_file, 'w') as t: + t.write(test_file_data) + + rm_conf = {'patterns': {'regex': ['12.*4', '^abcd']}} + pp = Cleaner(conf, rm_conf) + pp.clean_file(test_file, []) + with open(test_file, 'r') as t: + data = [i.strip() for i in t.readlines()] + assert '1234' not in data + assert 'abcd' not in data + arch.delete_archive_dir() + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_file_patterns_empty_result(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + arch = InsightsArchive(conf) + arch.create_archive_dir() + + # put something in the archive to redact + test_file = os.path.join(arch.archive_dir, 'test.file') + with open(test_file, 'w') as t: + t.write(test_file_data) + + rm_conf = {'patterns': {'regex': ['test', 'pwd', '12.*4', '^abcd']}} + pp = Cleaner(conf, rm_conf) + pp.clean_file(test_file) + # file is cleaned to empty, hence it was removed + assert not os.path.exists(test_file) + arch.delete_archive_dir() + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_file_patterns_exclude_no_regex(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + arch = InsightsArchive(conf) + arch.create_archive_dir() + + # put something in the archive to redact + test_file = os.path.join(arch.archive_dir, 'test.file') + with open(test_file, 'w') as t: + t.write(test_file_data) + + rm_conf = {'patterns': ['1234', 'abcd']} + pp = Cleaner(conf, rm_conf) + pp.clean_file(test_file, []) + with open(test_file, 'r') as t: + data = [i.strip() for i in t.readlines()] + assert '1234' not in data + assert 'abcd' not in data + arch.delete_archive_dir() + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_file_patterns_exclude_empty(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + arch = InsightsArchive(conf) + arch.create_archive_dir() + + # put something in the archive to redact + test_file = os.path.join(arch.archive_dir, 'test.file') + with open(test_file, 'w') as t: + t.write(test_file_data) + + rm_conf = {'patterns': []} + pp = Cleaner(conf, rm_conf) + pp.clean_file(test_file, []) + # file is not changed + with open(test_file, 'r') as t: + assert test_file_data == ''.join(t.readlines()) + arch.delete_archive_dir() + + +@mark.parametrize("obfuscate", [True, False]) +def test_clean_file_patterns_exclude_none(obfuscate): + conf = InsightsConfig(obfuscate=obfuscate) + arch = InsightsArchive(conf) + arch.create_archive_dir() + + # put something in the archive to redact + test_file = os.path.join(arch.archive_dir, 'test.file') + with open(test_file, 'w') as t: + t.write(test_file_data) + + pp = Cleaner(conf, None) + pp.clean_file(test_file, []) + # file is not changed + with open(test_file, 'r') as t: + assert test_file_data == ''.join(t.readlines()) + arch.delete_archive_dir() diff --git a/insights/tests/core/spec_cleaner/test_reports.py b/insights/tests/cleaner/test_reports.py similarity index 77% rename from insights/tests/core/spec_cleaner/test_reports.py rename to insights/tests/cleaner/test_reports.py index ac54c17663..96b19d8488 100644 --- a/insights/tests/core/spec_cleaner/test_reports.py +++ b/insights/tests/cleaner/test_reports.py @@ -7,23 +7,25 @@ from insights.client.archive import InsightsArchive from insights.client.config import InsightsConfig -from insights.core.spec_cleaner import Cleaner +from insights.cleaner import Cleaner +from insights.cleaner.utilities import write_report hostname = "report.test.com" test_file_data = 'ip: 10.0.2.155\ntestword\n{0}'.format(hostname) @mark.parametrize( - ("obfuscate", "obfuscate_hostname"), + ("obfuscate", "obfuscate_hostname", "keywords"), [ - (False, False), (True, False), (True, True), - ] + (False, False, []), + (True, False, ['testword']), + (True, True, ['testword']), + ], ) @mark.parametrize("test_umask", [0o000, 0o022]) -def test_rhsm_facts(test_umask, obfuscate, obfuscate_hostname): +def test_rhsm_facts(test_umask, obfuscate, obfuscate_hostname, keywords): rhsm_facts_file = '/tmp/insights_test_rhsm.facts' - conf = InsightsConfig(obfuscate=obfuscate, - obfuscate_hostname=obfuscate_hostname) + conf = InsightsConfig(obfuscate=obfuscate, obfuscate_hostname=obfuscate_hostname) conf.rhsm_facts_file = rhsm_facts_file arch = InsightsArchive(conf) arch.create_archive_dir() @@ -34,7 +36,7 @@ def test_rhsm_facts(test_umask, obfuscate, obfuscate_hostname): t.write(test_file_data) old_umask = os.umask(test_umask) - pp = Cleaner(conf, {'keywords': ['testword']}, hostname) + pp = Cleaner(conf, {'keywords': keywords}, hostname) pp.clean_file(test_file, []) pp.generate_report(arch.archive_name) arch.delete_archive_dir() @@ -50,24 +52,27 @@ def test_rhsm_facts(test_umask, obfuscate, obfuscate_hostname): # hostname assert facts['insights_client.hostname'] == hostname assert facts['insights_client.obfuscate_hostname_enabled'] == obfuscate_hostname - hns = json.loads(facts['insights_client.hostnames']) + hns = json.loads(facts['insights_client.obfuscated_hostname']) if obfuscate_hostname: assert hns[0]['original'] == hostname assert '.example.com' in hns[0]['obfuscated'] else: - hns == [] + assert hns == [] # ip assert facts['insights_client.obfuscate_ip_enabled'] == obfuscate - ips = json.loads(facts['insights_client.ips']) + ips = json.loads(facts['insights_client.obfuscated_ipv4']) if obfuscate: assert ips[0]['original'] == '10.0.2.155' assert ips[0]['obfuscated'] == '10.230.230.1' else: assert ips == [] # keyword - kws = json.loads(facts['insights_client.keywords']) - assert kws[0]['original'] == 'testword' - assert kws[0]['obfuscated'] == 'keyword0' + kws = json.loads(facts['insights_client.obfuscated_keyword']) + if keywords: + assert kws[0]['original'] == 'testword' + assert kws[0]['obfuscated'] == 'keyword0' + else: + assert kws == [] os.unlink(rhsm_facts_file) @@ -76,13 +81,14 @@ def test_rhsm_facts(test_umask, obfuscate, obfuscate_hostname): @mark.parametrize( ("obfuscate", "obfuscate_hostname"), [ - (False, False), (True, False), (True, True), - ] + (False, False), + (True, False), + (True, True), + ], ) -@patch('insights.core.spec_cleaner.Cleaner.generate_rhsm_facts', return_value=None) +@patch('insights.cleaner.Cleaner.generate_rhsm_facts', return_value=None) def test_all_csv_reports(rhsm_facts, obfuscate, rm_conf, obfuscate_hostname): - conf = InsightsConfig(obfuscate=obfuscate, - obfuscate_hostname=obfuscate_hostname) + conf = InsightsConfig(obfuscate=obfuscate, obfuscate_hostname=obfuscate_hostname) arch = InsightsArchive(conf) arch.create_archive_dir() @@ -141,3 +147,9 @@ def test_all_csv_reports(rhsm_facts, obfuscate, rm_conf, obfuscate_hostname): os.unlink(kw_report_file) else: assert not os.path.isfile(kw_report_file) + + +def test_wirte_report_exp(): + report_file = '/tmp/_test.csv' + write_report(None, report_file) + os.unlink(report_file) diff --git a/insights/tests/core/spec_cleaner/test_clean_content.py b/insights/tests/core/spec_cleaner/test_clean_content.py deleted file mode 100644 index 6c36d2ba1b..0000000000 --- a/insights/tests/core/spec_cleaner/test_clean_content.py +++ /dev/null @@ -1,120 +0,0 @@ -from pytest import mark - -from insights.client.config import InsightsConfig -from insights.core.spec_cleaner import Cleaner - -test_data = 'test\nabcd\n \n\n1234\npwd: p4ssw0rd\n'.splitlines() -test_data_sensitive = 'test \n\n\nabcd\n1234\npassword: p4ssw0rd\n'.splitlines() - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_line_changed_password(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - - pp = Cleaner(conf, {}) - ret = pp.clean_content(test_data_sensitive, []) - # content IS changed - assert test_data != ret - assert 'p4ssw0rd' not in ret[-1] - assert '********' in ret[-1] - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_keyword_empty_not_change(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - - pp = Cleaner(conf, {}) # empty keywords - ret = pp.clean_content(test_data, []) - # content is NOT changed - assert test_data == ret - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_keyword_changed_keyword(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - - pp = Cleaner(conf, {'keywords': ['test']}) - ret = pp.clean_content(test_data, []) - # content IS changed - assert test_data != ret - assert 'test' not in ret[0] - assert 'keyword0' in ret[0] - assert ret[1] == test_data[1] - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_keyword_no_such_keyword_to_change(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - - pp = Cleaner(conf, {'keywords': ['t_e_s_t']}) # no such keyword - ret = pp.clean_content(test_data, []) - # content is NOT changed - assert test_data == ret - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_keyword_disabled_by_no_redact(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - - pp = Cleaner(conf, {'keywords': 'test'}) - ret = pp.clean_content(test_data, [], no_redact=True) - # content is NOT changed - assert test_data == ret - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_patterns_exclude_regex(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - - rm_conf = {'patterns': {'regex': ['12.*4', '^abcd']}} - pp = Cleaner(conf, rm_conf) - ret = pp.clean_content(test_data, []) - # content IS changed - assert test_data != ret - assert '1234' not in ret - assert 'abcd' not in ret - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_result_empty(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - - rm_conf = {'patterns': {'regex': [' ', '12.*4', '^abcd', 'test', 'pwd', 'w0rd']}} - pp = Cleaner(conf, rm_conf) - ret = pp.clean_content(test_data, []) - # result content is Empty - assert len(ret) == 0 - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_patterns_exclude_no_regex(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - - rm_conf = {'patterns': ['1234', 'abcde']} - pp = Cleaner(conf, rm_conf) - ret = pp.clean_content(test_data, []) - # content IS changed - assert test_data != ret - assert '1234' not in ret - assert 'abcd' in ret - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_patterns_exclude_empty(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - - rm_conf = {'patterns': []} - pp = Cleaner(conf, rm_conf) - ret = pp.clean_content(test_data, []) - # file is NOT changed - assert test_data == ret - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_exclude_none(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - - pp = Cleaner(conf, None) - ret = pp.clean_content(test_data, []) - # file is NOT changed - assert test_data == ret diff --git a/insights/tests/core/spec_cleaner/test_clean_file_obfuscate.py b/insights/tests/core/spec_cleaner/test_clean_file_obfuscate.py deleted file mode 100644 index 0c0fed59b9..0000000000 --- a/insights/tests/core/spec_cleaner/test_clean_file_obfuscate.py +++ /dev/null @@ -1,276 +0,0 @@ -# -*- coding: utf-8 -*- -import os - -from mock.mock import patch -from pytest import mark - -from insights.client.archive import InsightsArchive -from insights.client.config import InsightsConfig -from insights.core.spec_cleaner import Cleaner - - -@mark.parametrize(("line", "expected"), [ - ("test_no_ip", "test_no_ip"), - ("test 127.0.0.1", "test 127.0.0.1"), - ("radius_ip_1=10.0.0.1", "radius_ip_1=10.230.230.1"), - ( - ( - " inet 10.0.2.15" - " netmask 255.255.255.0" - " broadcast 10.0.2.255" - ), - ( - " inet 10.230.230.3" - " netmask 10.230.230.1" - " broadcast 10.230.230.2" - ), - ), - ( - "radius_ip_1=10.0.0.100-10.0.0.200", - "radius_ip_1=10.230.230.1-10.230.230.2", - ), -]) -def test_obfuscate_ip_match(line, expected): - c = InsightsConfig(obfuscate=True) - pp = Cleaner(c, {}) - actual = pp._obfuscate_line(line, [pp._sub_ip]) - assert actual == expected - - -@mark.parametrize(("line", "expected"), [ - ( - ( - " inet 10.0.2.155" - " netmask 10.0.2.1" - " broadcast 10.0.2.15" - ), - ( - " inet 10.230.230.1" - " netmask 10.230.230.3" - " broadcast 10.230.230.2" - ), - ), -]) -def test_obfuscate_ip_match_IP_overlap(line, expected): - c = InsightsConfig(obfuscate=True) - pp = Cleaner(c, {}) - actual = pp._obfuscate_line(line, [pp._sub_ip]) - assert actual == expected - - -@mark.parametrize(("line", "expected"), [ - ("test_no_ip", "test_no_ip"), - ("test 127.0.0.1", "test 127.0.0.1"), - ( - "tcp6 0 0 10.0.0.1:23 10.0.0.110:63564 ESTABLISHED 0", - "tcp6 0 0 10.230.230.2:23 10.230.230.1:63564 ESTABLISHED 0" - ), - ( - "tcp6 10.0.0.11 0 10.0.0.1:23 10.0.0.111:63564 ESTABLISHED 0", - "tcp6 10.230.230.2 0 10.230.230.3:23 10.230.230.1:63564 ESTABLISHED 0" - ), - ( - "unix 2 [ ACC ] STREAM LISTENING 43279 2070/snmpd 172.31.0.1\n", - "unix 2 [ ACC ] STREAM LISTENING 43279 2070/snmpd 10.230.230.1\n" - ), - ( - "unix 2 [ ACC ] STREAM LISTENING 43279 2070/snmpd 172.31.111.11\n", - "unix 2 [ ACC ] STREAM LISTENING 43279 2070/snmpd 10.230.230.1 \n" - ), -]) -def test_obfuscate_ip_match_IP_overlap_netstat(line, expected): - c = InsightsConfig(obfuscate=True) - pp = Cleaner(c, {}) - actual1 = pp._obfuscate_line(line, [pp._sub_ip_netstat]) - actual2 = pp._obfuscate_line(line, [pp._sub_ip_netstat]) - assert actual1 == expected - assert actual2 == expected - - -@mark.parametrize(("original", "expected"), [ - ( - "{\"name\":\"shadow-utils\"," - "\"epoch\":\"2\"," - "\"version\":\"4.1.5.1\"," - "\"release\":\"5.el6\"," - "\"arch\":\"x86_64\"," - "\"installtime\":\"Wed 13 Jan 2021 10:04:18 AM CET\"," - "\"buildtime\":\"1455012203\"," - "\"vendor\":\"Red Hat, Inc.\"," - "\"buildhost\":\"x86-027.build.eng.bos.redhat.com\"," - "\"sigpgp\":" - "\"RSA/8, " - "Tue 08 Mar 2016 11:15:08 AM CET, " - "Key ID 199e2f91fd431d51\"}", - - "{\"name\":\"shadow-utils\"," - "\"epoch\":\"2\"," - "\"version\":\"10.230.230.1\"," - "\"release\":\"5.el6\"," - "\"arch\":\"x86_64\"," - "\"installtime\":\"Wed 13 Jan 2021 10:04:18 AM CET\"," - "\"buildtime\":\"1455012203\"," - "\"vendor\":\"Red Hat, Inc.\"," - "\"buildhost\":\"x86-027.build.eng.bos.redhat.com\"," - "\"sigpgp\":" - "\"RSA/8, " - "Tue 08 Mar 2016 11:15:08 AM CET, " - "Key ID 199e2f91fd431d51\"}", - ) -]) -@patch("insights.core.spec_cleaner.Cleaner._ip2db", return_value="10.230.230.1") -def test_obfuscate_ip_false_positive(_ip2db, original, expected): - c = InsightsConfig(obfuscate=True) - pp = Cleaner(c, {}) - actual = pp._obfuscate_line(original, [pp._sub_ip]) - assert actual == expected - # BUT works well without "obfuscate=['ip'] - actual = pp._obfuscate_line(original, []) - assert actual == original - - -def test_obfuscate_hostname(): - hostname = 'test1.abc.com' - line = "a line with %s here, test2.abc.com, test.redhat.com" % hostname - c = InsightsConfig(obfuscate=True, obfuscate_hostname=True, hostname=hostname) - pp = Cleaner(c, {}, hostname) - actual = pp._obfuscate_line(line, [pp._sub_hostname]) - assert 'test1' not in actual - assert 'test2' not in actual - assert 'abc.com' not in actual - assert len(actual.split('.')[0].split()[-1]) == 12 - assert '.example.com' in actual - - line = "a line w/o hostname, but test2.abc.com only" - actual = pp._obfuscate_line(line, [pp._sub_hostname]) - assert 'test2' not in actual - assert 'abc.com' not in actual - assert '.example.com' in actual - assert len(actual.split('.')[0].split()[-1]) != 12 - - hostname = 'test1' # Short hostname - line = "a line with %s here, test2.def.com" % hostname - pp = Cleaner(c, {}, hostname) - actual = pp._obfuscate_line(line, [pp._sub_hostname]) - assert hostname not in actual - assert 'test2.def.com' in actual - - line = "a line w/o hostname" - hostname = 'test1.abc.com' - pp = Cleaner(c, {}, hostname) - actual = pp._obfuscate_line(line, [pp._sub_hostname]) - assert line == actual - - -@patch("insights.core.spec_cleaner.determine_hostname", return_value='test1.abc.com') -def test_obfuscate_hostname_determine_hostanme(hn): - hostname = 'test1.abc.com' - c = InsightsConfig(obfuscate=True, obfuscate_hostname=True, display_name='disp.abc.com') - line = "a line with %s here, test2.def.com" % hostname - pp = Cleaner(c, {}) # passed empty hostname to cleaner, determain it - actual = pp._obfuscate_line(line, [pp._sub_hostname]) - assert hostname not in actual - assert len(actual.split('.')[0].split()[-1]) == 12 - assert 'test2.def.com' in actual - - -@patch("insights.core.spec_cleaner.determine_hostname", return_value='dt_test.abc.com') -def test_cleaner_fqdn(de_hn): - fqdn = 'test.abc.com' - c = InsightsConfig(obfuscate=True, obfuscate_hostname=True, display_name='disp.abc.com') - pp = Cleaner(c, {}, fqdn) # pass fqdn to cleaner - assert pp.fqdn == fqdn - assert len(pp.obfuscated_fqdn.split('.')[0]) == 12 - - fqdn1 = 'test.def.com' - pp = Cleaner(c, {}, fqdn1) # pass fqdn1 to cleaner - assert pp.fqdn == fqdn1 - assert len(pp.obfuscated_fqdn.split('.')[0]) == 12 - - pp = Cleaner(c, {}) # pass None "hostname" to cleaner - assert pp.fqdn == "dt_test.abc.com" # get hostname from determine_hostname, but not display_name - assert len(pp.obfuscated_fqdn.split('.')[0]) == 12 - - -def test_obfuscate_hostname_and_ip(): - hostname = 'test1.abc.com' - line = "test1.abc.com, 10.0.0.1 test1.abc.loc, 20.1.4.7 smtp.abc.com, 10.1.2.7 lite.abc.com" - c = InsightsConfig(obfuscate=True, obfuscate_hostname=True, hostname=hostname) - pp = Cleaner(c, {}, hostname) - result = pp._obfuscate_line(line, [pp._sub_ip, pp._sub_hostname]) - assert 'example.com' in result - assert '10.230.230' in result - for item in line.split(): - assert item not in result - - -def test_clean_file_obfuscate(): - conf = InsightsConfig(obfuscate=True) - arch = InsightsArchive(conf) - arch.create_archive_dir() - - # netstat_-neopa - line = "tcp6 0 0 10.0.0.1:23 10.0.0.110:63564 ESTABLISHED 0" - ret = "tcp6 0 0 10.230.230.2:23 10.230.230.1:63564 ESTABLISHED 0" - - test_dir = os.path.join(arch.archive_dir, 'data', 'etc') - os.makedirs(test_dir) - pp = Cleaner(conf, {}) - - # netstat - test_file = os.path.join(arch.archive_dir, 'data', 'testfile.netstat_-neopa') - with open(test_file, 'w') as t: - t.write(line) - pp.clean_file(test_file, no_obfuscate=[]) - # file is changed per netstat logic - with open(test_file, 'r') as t: - assert ret == ''.join(t.readlines()) - - arch.delete_archive_dir() - - -def test_clean_file_obfuscate_disabled_by_no_obfuscate(): - conf = InsightsConfig(obfuscate=True) - arch = InsightsArchive(conf) - arch.create_archive_dir() - - # netstat_-neopa - line = "tcp6 0 0 10.0.0.1:23 10.0.0.110:63564 ESTABLISHED 0" - - test_dir = os.path.join(arch.archive_dir, 'data', 'etc') - os.makedirs(test_dir) - pp = Cleaner(conf, {}) - - # netstat - test_file = os.path.join(arch.archive_dir, 'data', 'testfile.netstat_-neopa') - with open(test_file, 'w') as t: - t.write(line) - pp.clean_file(test_file, no_obfuscate=['ip']) - # file is NOT changed - with open(test_file, 'r') as t: - assert line == ''.join(t.readlines()) - - arch.delete_archive_dir() - - -@patch("insights.core.spec_cleaner.Cleaner._redact_line") -def test_clean_file_non_exist(redact_func): - conf = InsightsConfig(obfuscate=True) - arch = InsightsArchive(conf) - arch.create_archive_dir() - - test_dir = os.path.join(arch.archive_dir, 'data', 'etc') - os.makedirs(test_dir) - pp = Cleaner(conf, {}) - - pp.clean_file('non_existing_file', no_obfuscate=[]) - redact_func.assert_not_called() - - # empty file - test_file = os.path.join(arch.archive_dir, 'data', 'etc', 'x.conf') - with open(test_file, 'w'): - pass - pp.clean_file(test_file, no_obfuscate=[]) - redact_func.assert_not_called() - - arch.delete_archive_dir() diff --git a/insights/tests/core/spec_cleaner/test_clean_file_redact.py b/insights/tests/core/spec_cleaner/test_clean_file_redact.py deleted file mode 100644 index bb4b9df5fc..0000000000 --- a/insights/tests/core/spec_cleaner/test_clean_file_redact.py +++ /dev/null @@ -1,317 +0,0 @@ -import os - -from mock.mock import patch, Mock -from pytest import mark - -from insights.client.archive import InsightsArchive -from insights.client.config import InsightsConfig -from insights.core.spec_cleaner import Cleaner - -test_file_data = 'test\nabcd\n1234\npwd: p4ssw0rd\n' -test_file_data_sensitive = 'test\nabcd\n1234\npassword: p4ssw0rd here\npassword= p4ssw0rd here\npassword' - - -@patch('insights.client.archive.InsightsArchive', Mock()) -@patch('insights.client.core_collector.CoreCollector._write_branch_info', Mock()) -@patch('insights.client.core_collector.CoreCollector._write_display_name', Mock()) -@patch('insights.client.core_collector.CoreCollector._write_version_info', Mock()) -@patch('insights.client.core_collector.CoreCollector._write_tags', Mock()) -@patch('insights.client.core_collector.CoreCollector._write_blacklist_report', Mock()) -@patch('insights.client.core_collector.collect.collect', Mock(return_value=('/var/tmp/testarchive/insights-test', {}))) -def test_redact_core(): - conf = InsightsConfig() - rm_conf = {'test': 'test'} - pp = Cleaner(conf, rm_conf) - assert pp.redact['exclude'] == [] - assert pp.redact['regex'] is False - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_line_changed_password(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - arch = InsightsArchive(conf) - arch.create_archive_dir() - - # put something in the archive to redact - test_file = os.path.join(arch.archive_dir, 'test.file') - with open(test_file, 'w') as t: - t.write(test_file_data_sensitive) - old_data = test_file_data_sensitive.splitlines() - - pp = Cleaner(conf, {}) - pp.clean_file(test_file, []) - # file is changed - pwd_line_cnt = 0 - with open(test_file, 'r') as t: - new_data = t.readlines() - for idx, line in enumerate(old_data): - if 'p4ssw0rd' in line: - pwd_line_cnt += 1 - assert 'p4ssw0rd' not in new_data[idx] - assert '********' in new_data[idx] - if line.endswith('password'): - pwd_line_cnt += 1 - assert line == new_data[idx] - assert pwd_line_cnt == 3 - arch.delete_archive_dir() - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_keyword_empty_not_change(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - arch = InsightsArchive(conf) - arch.create_archive_dir() - - # put something in the archive to redact - test_file = os.path.join(arch.archive_dir, 'test.file') - with open(test_file, 'w') as t: - t.write(test_file_data) - - pp = Cleaner(conf, {}) # empty keywords - pp.clean_file(test_file, []) - # file is NOT changed - with open(test_file, 'r') as t: - assert test_file_data == ''.join(t.readlines()) - arch.delete_archive_dir() - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_keyword_changed_keyword(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - arch = InsightsArchive(conf) - arch.create_archive_dir() - - # put something in the archive to redact - test_file = os.path.join(arch.archive_dir, 'test.file') - with open(test_file, 'w') as t: - t.write(test_file_data) - - pp = Cleaner(conf, {'keywords': ['test']}) - pp.clean_file(test_file, []) - # file is changed - with open(test_file, 'r') as t: - data = t.readlines() - assert 'test' not in data[0] - assert 'keyword0' in data[0] - arch.delete_archive_dir() - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_keyword_no_such_keyword_to_change(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - arch = InsightsArchive(conf) - arch.create_archive_dir() - - # put something in the archive to redact - test_file = os.path.join(arch.archive_dir, 'test.file') - with open(test_file, 'w') as t: - t.write(test_file_data) - - pp = Cleaner(conf, {'keywords': ['t_e_s_t']}) # no such keyword - pp.clean_file(test_file, []) - # file is NOT changed - with open(test_file, 'r') as t: - assert test_file_data == ''.join(t.readlines()) - arch.delete_archive_dir() - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_keyword_disabled_by_no_redact(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - arch = InsightsArchive(conf) - arch.create_archive_dir() - - # put something in the archive to redact - test_file = os.path.join(arch.archive_dir, 'test.file') - with open(test_file, 'w') as t: - t.write(test_file_data) - - pp = Cleaner(conf, {'keywords': 'test'}) - pp.clean_file(test_file, [], no_redact=True) - # file is NOT changed - with open(test_file, 'r') as t: - assert test_file_data == ''.join(t.readlines()) - arch.delete_archive_dir() - - -@mark.parametrize(("line", "expected"), [ - ( - "what's your name? what day is today?", - "what's your keyword0? what keyword1 is tokeyword1?" - ), -]) -@mark.parametrize("obfuscate", [True, False]) -def test_redact_line_keyword_extract_cases(obfuscate, line, expected): - conf = InsightsConfig(obfuscate=obfuscate) - pp = Cleaner(conf, {'keywords': ['name', 'day']}) - actual = pp._redact_line(line) - assert actual == expected - - -def test_redact_line_keyword_with_hostname_and_ip(): - hostname = 'test1.abc.com' - line = "test1.abc.com, 10.0.0.1, test1.abc.loc, 20.1.4.7, smtp.abc.com, what's your name?, what day is today?" - conf = InsightsConfig(obfuscate=True, obfuscate_hostname=True, hostname=hostname) - pp = Cleaner(conf, {'keywords': ['name', 'day']}, hostname) - result = pp._redact_line(line) - assert 'test1.abc.com' in result # hostname is not processed in _redact_line - assert '10.0.0.1' in result # IP is not processed in _redact_line - assert '20.1.4.7' in result # IP is not processed in _redact_line - assert 'name' not in result - assert 'day' not in result - assert 'keyword0' in result - assert 'keyword1' in result - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_patterns_exclude_regex(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - arch = InsightsArchive(conf) - arch.create_archive_dir() - - # put something in the archive to redact - test_file = os.path.join(arch.archive_dir, 'test.file') - with open(test_file, 'w') as t: - t.write(test_file_data) - - rm_conf = {'patterns': {'regex': ['12.*4', '^abcd']}} - pp = Cleaner(conf, rm_conf) - pp.clean_file(test_file, []) - with open(test_file, 'r') as t: - data = [i.strip() for i in t.readlines()] - assert '1234' not in data - assert 'abcd' not in data - arch.delete_archive_dir() - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_file_empty(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - arch = InsightsArchive(conf) - arch.create_archive_dir() - - # put something in the archive to redact - test_file = os.path.join(arch.archive_dir, 'test.file') - with open(test_file, 'w') as t: - t.write(test_file_data) - - rm_conf = {'patterns': {'regex': ['test', 'pwd', '12.*4', '^abcd']}} - pp = Cleaner(conf, rm_conf) - pp.clean_file(test_file) - # file is cleaned to empty, hence it was removed - assert not os.path.exists(test_file) - arch.delete_archive_dir() - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_patterns_exclude_no_regex(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - arch = InsightsArchive(conf) - arch.create_archive_dir() - - # put something in the archive to redact - test_file = os.path.join(arch.archive_dir, 'test.file') - with open(test_file, 'w') as t: - t.write(test_file_data) - - rm_conf = {'patterns': ['1234', 'abcd']} - pp = Cleaner(conf, rm_conf) - pp.clean_file(test_file, []) - with open(test_file, 'r') as t: - data = [i.strip() for i in t.readlines()] - assert '1234' not in data - assert 'abcd' not in data - arch.delete_archive_dir() - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_patterns_exclude_empty(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - arch = InsightsArchive(conf) - arch.create_archive_dir() - - # put something in the archive to redact - test_file = os.path.join(arch.archive_dir, 'test.file') - with open(test_file, 'w') as t: - t.write(test_file_data) - - rm_conf = {'patterns': []} - pp = Cleaner(conf, rm_conf) - pp.clean_file(test_file, []) - # file is not changed - with open(test_file, 'r') as t: - assert test_file_data == ''.join(t.readlines()) - arch.delete_archive_dir() - - -@mark.parametrize("obfuscate", [True, False]) -def test_redact_exclude_none(obfuscate): - conf = InsightsConfig(obfuscate=obfuscate) - arch = InsightsArchive(conf) - arch.create_archive_dir() - - # put something in the archive to redact - test_file = os.path.join(arch.archive_dir, 'test.file') - with open(test_file, 'w') as t: - t.write(test_file_data) - - pp = Cleaner(conf, None) - pp.clean_file(test_file, []) - # file is not changed - with open(test_file, 'r') as t: - assert test_file_data == ''.join(t.readlines()) - arch.delete_archive_dir() - - -@mark.parametrize(("line", "expected"), [ - ("test1.abc.com: it's myserver? what is yours?", None), - ("testabc: it's mykey? what is yours?", None), - ( - "testabc: it's my1key? what is yours?", - "testabc: it's my1key? what is yours?", - ), -]) -def test_redact_exclude_patterns(line, expected): - c = InsightsConfig() - pp = Cleaner(c, {'patterns': ['myserver', 'mykey']}) - actual = pp._redact_line(line) - assert actual == expected - - -@mark.parametrize(("line", "expected"), [ - ("test.abc.com: it's myserver? what is yours?", None), - ("testabc: it's mykey? what is yours?", None), - ("testabc: it's my1key? what is yours?", None), - ("test1: it's my-key? what is yours?", "test1: it's my-key? what is yours?"), -]) -def test_redact_patterns_regex(line, expected): - c = InsightsConfig() - pp = Cleaner(c, {'patterns': {'regex': ['myserver', r'my(\w*)key']}}) - actual = pp._redact_line(line) - assert actual == expected - - -@mark.parametrize(("line", "expected"), [ - ("test.abc.com: it's myserver? what is yours?", None), - ("testabc: it's mykey? what is yours?", None), - ("testabc: it's my1key? what is yours?", None), - ("test1: it's my-key? what is yours?", None), - ("test: it's my-key? what is yours?", "test: it's my-key? what is yours?"), -]) -def test_redact_patterns_posix_regex(line, expected): - c = InsightsConfig() - pp = Cleaner(c, {'patterns': {'regex': ['myserver', r'my(\w*)key', 'test[[:digit:]]']}}) - actual = pp._redact_line(line) - assert actual == expected - - -@mark.parametrize(("line", "expected"), [ - ("password: p@ss_W0rd ?", "password: ******** ?"), - ("password = p@ss_W0rd ?", "password = ******** ?"), - ("password=p@ss_W0-d", "password=********"), -]) -def test_redact_password(line, expected): - c = InsightsConfig() - pp = Cleaner(c, {'patterns': {'regex': ['myserver', r'my(\w*)key']}}) - actual = pp._redact_line(line) - assert actual == expected diff --git a/insights/tests/core/spec_cleaner/test_get_obfuscate_functions.py b/insights/tests/core/spec_cleaner/test_get_obfuscate_functions.py deleted file mode 100644 index 10cd4847ce..0000000000 --- a/insights/tests/core/spec_cleaner/test_get_obfuscate_functions.py +++ /dev/null @@ -1,33 +0,0 @@ -from insights.client.config import InsightsConfig -from insights.core.spec_cleaner import Cleaner - - -def test_get_obfuscate_functions_default_obfuscate_true(): - conf = InsightsConfig(obfuscate=True) - pp = Cleaner(conf, {}) - assert pp.get_obfuscate_functions() == [pp._sub_ip] - - conf = InsightsConfig(obfuscate=True, obfuscate_hostname=True) - pp = Cleaner(conf, {}) - assert pp.get_obfuscate_functions() == [pp._sub_ip, pp._sub_hostname] - - -def test_get_obfuscate_functions_default_obfuscate_false(): - conf = InsightsConfig(obfuscate=False) - pp = Cleaner(conf, {}) - assert pp.get_obfuscate_functions() == [] - - -def test_get_obfuscate_functions(): - conf = InsightsConfig(obfuscate=True) - pp = Cleaner(conf, {}) - assert pp.get_obfuscate_functions(filename='test') == [pp._sub_ip] - assert pp.get_obfuscate_functions(filename='netstat_-neopa') == [pp._sub_ip_netstat] - assert pp.get_obfuscate_functions(no_obfuscate=['ip']) == [] - - conf = InsightsConfig(obfuscate=True, obfuscate_hostname=True) - pp = Cleaner(conf, {}) - assert pp.get_obfuscate_functions(filename='test') == [pp._sub_ip, pp._sub_hostname] - assert pp.get_obfuscate_functions(filename='netstat_-neopa') == [pp._sub_ip_netstat, pp._sub_hostname] - assert pp.get_obfuscate_functions(no_obfuscate=['ip']) == [pp._sub_hostname] - assert pp.get_obfuscate_functions(no_obfuscate=['ip', 'hostname']) == [] diff --git a/insights/tests/specs/test_specs.py b/insights/tests/specs/test_specs.py index 8e1ec3f5bf..8838e0e283 100644 --- a/insights/tests/specs/test_specs.py +++ b/insights/tests/specs/test_specs.py @@ -8,6 +8,7 @@ from mock.mock import patch from insights import collect +from insights.cleaner import Cleaner from insights.client.archive import InsightsArchive from insights.client.config import InsightsConfig from insights.core import Parser, dr @@ -16,7 +17,6 @@ from insights.core.exceptions import ContentException from insights.core.filters import add_filter from insights.core.plugins import datasource -from insights.core.spec_cleaner import Cleaner from insights.core.spec_factory import ( DatasourceProvider, RegistryPoint, @@ -367,7 +367,7 @@ def test_exp_no_filters(): @pytest.mark.parametrize("obfuscate", [True, False]) -@patch('insights.core.spec_cleaner.Cleaner.generate_report', return_value=None) +@patch('insights.cleaner.Cleaner.generate_report', return_value=None) def test_specs_collect(gen, obfuscate): add_filter(Stuff.many_glob_filter, " ") add_filter(Stuff.many_foreach_exe_filter, " ") diff --git a/insights/tests/specs/test_specs_content_redaction_empty.py b/insights/tests/specs/test_specs_content_redaction_empty.py index aea182b02b..2c2182a25a 100644 --- a/insights/tests/specs/test_specs_content_redaction_empty.py +++ b/insights/tests/specs/test_specs_content_redaction_empty.py @@ -85,7 +85,7 @@ def teardown_function(func): dr.ENABLED = defaultdict(lambda: True) -@patch('insights.core.spec_cleaner.Cleaner.generate_report') +@patch('insights.cleaner.Cleaner.generate_report') def test_specs_ds_with_hn_collect(mock_fun): # Preparation manifest = collect.load_manifest(specs_manifest) diff --git a/insights/tests/specs/test_specs_filters.py b/insights/tests/specs/test_specs_filters.py index 7674e7b463..98e3c7e78c 100644 --- a/insights/tests/specs/test_specs_filters.py +++ b/insights/tests/specs/test_specs_filters.py @@ -13,7 +13,7 @@ from insights.core.context import HostContext from insights.core.filters import add_filter from insights.core.plugins import datasource -from insights.core.spec_cleaner import Cleaner +from insights.cleaner import Cleaner from insights.core.spec_factory import ( RegistryPoint, SpecSet, @@ -289,7 +289,7 @@ def test_exp_no_filters(): @pytest.mark.parametrize("obfuscate", [True, False]) -@patch('insights.core.spec_cleaner.Cleaner.generate_report', return_value=None) +@patch('insights.cleaner.Cleaner.generate_report', return_value=None) def test_specs_filters_collect(gen, obfuscate): add_filter(Stuff.many_glob_filter, " ") add_filter(Stuff.many_foreach_exe_filter, " ") diff --git a/insights/tests/specs/test_specs_runtime_ds_obfuscation.py b/insights/tests/specs/test_specs_runtime_ds_obfuscation.py index f3aef1e83d..3437bce08a 100644 --- a/insights/tests/specs/test_specs_runtime_ds_obfuscation.py +++ b/insights/tests/specs/test_specs_runtime_ds_obfuscation.py @@ -112,7 +112,7 @@ def teardown_function(func): @pytest.mark.parametrize("obfuscate", [True, False]) -@patch('insights.core.spec_cleaner.Cleaner.generate_report') +@patch('insights.cleaner.Cleaner.generate_report') def test_specs_ds_with_hn_collect(mock_fun, obfuscate): # Preparation manifest = collect.load_manifest(specs_manifest) diff --git a/insights/tests/specs/test_specs_save_as.py b/insights/tests/specs/test_specs_save_as.py index 1226104334..8d67f1f6ea 100644 --- a/insights/tests/specs/test_specs_save_as.py +++ b/insights/tests/specs/test_specs_save_as.py @@ -13,7 +13,7 @@ from insights.core.context import HostContext from insights.core.filters import add_filter from insights.core.plugins import datasource -from insights.core.spec_cleaner import Cleaner +from insights.cleaner import Cleaner from insights.core.spec_factory import ( RawFileProvider, RegistryPoint, @@ -206,7 +206,7 @@ def test_specs_save_as_no_collect(): @mark.parametrize("obfuscate", [True, False]) -@patch('insights.core.spec_cleaner.Cleaner.generate_report', Mock()) +@patch('insights.cleaner.Cleaner.generate_report', Mock()) def test_specs_save_as_collect(obfuscate): add_filter(Stuff.smpl_cmd_w_filter, " hello ") add_filter(Stuff.smpl_file_w_filter, "def test") diff --git a/insights/tests/specs/test_specs_special_content.py b/insights/tests/specs/test_specs_special_content.py index 149af24916..bd93a6b176 100644 --- a/insights/tests/specs/test_specs_special_content.py +++ b/insights/tests/specs/test_specs_special_content.py @@ -89,7 +89,7 @@ def teardown_function(func): @pytest.mark.parametrize("obfuscate", [True, False]) -@patch('insights.core.spec_cleaner.Cleaner.generate_report') +@patch('insights.cleaner.Cleaner.generate_report') def test_specs_special_content_collect(report, obfuscate): # Preparation manifest = collect.load_manifest(specs_manifest)