From eac8230d261bc12f86a6b0b3b15d57cb3022ce12 Mon Sep 17 00:00:00 2001 From: Ryan Blakley Date: Tue, 24 Jan 2023 13:27:25 -0500 Subject: [PATCH] feat: Capture blacklisted specs inside archive * Capture the name of blacklisted specs, and dump them to a file in the archive. * Added parser and spec to parse the blacklisted specs and use in rules. Signed-off-by: Ryan Blakley --- docs/shared_parsers_catalog/blacklisted.rst | 3 ++ insights/client/data_collector.py | 11 ++++++ insights/collect.py | 43 +++++++++++++++++++++ insights/core/blacklist.py | 1 + insights/core/dr.py | 7 +++- insights/core/exceptions.py | 7 ++++ insights/core/spec_factory.py | 6 +-- insights/parsers/blacklisted.py | 30 ++++++++++++++ insights/specs/__init__.py | 1 + insights/specs/insights_archive.py | 1 + insights/tests/parsers/test_blacklist.py | 29 ++++++++++++++ 11 files changed, 135 insertions(+), 4 deletions(-) create mode 100644 docs/shared_parsers_catalog/blacklisted.rst create mode 100644 insights/parsers/blacklisted.py create mode 100644 insights/tests/parsers/test_blacklist.py diff --git a/docs/shared_parsers_catalog/blacklisted.rst b/docs/shared_parsers_catalog/blacklisted.rst new file mode 100644 index 0000000000..1f65ea50c0 --- /dev/null +++ b/docs/shared_parsers_catalog/blacklisted.rst @@ -0,0 +1,3 @@ +.. automodule:: insights.parsers.blacklisted + :members: + :show-inheritance: diff --git a/insights/client/data_collector.py b/insights/client/data_collector.py index 2688d0f4e7..91dc54a491 100644 --- a/insights/client/data_collector.py +++ b/insights/client/data_collector.py @@ -15,6 +15,7 @@ from subprocess import Popen, PIPE, STDOUT from tempfile import NamedTemporaryFile +from insights.core.blacklist import BLACKLISTED_SPECS from insights.util import mangle from ..contrib.soscleaner import SOSCleaner from .utilities import _expand_paths, get_version_info, systemd_notify_init_thread, get_tags @@ -132,6 +133,10 @@ def _write_blacklist_report(self, blacklist_report): self.archive.add_metadata_to_archive( json.dumps(blacklist_report), '/blacklist_report') + if BLACKLISTED_SPECS: + self.archive.add_metadata_to_archive( + json.dumps({"specs": BLACKLISTED_SPECS}), '/blacklisted_specs.txt') + def _write_egg_release(self): logger.debug("Writing egg release to archive...") egg_release = '' @@ -327,11 +332,13 @@ def run_collection(self, conf, rm_conf, branch_info, blacklist_report): 'insights_commands', mangle.mangle_command(c['command'])) if c['command'] in rm_commands or c.get('symbolic_name') in rm_commands: logger.warn("WARNING: Skipping command %s", c['command']) + BLACKLISTED_SPECS.append(c['symbolic_name']) elif self.mountpoint == "/" or c.get("image"): cmd_specs = self._parse_command_spec(c, conf['pre_commands']) for s in cmd_specs: if s['command'] in rm_commands: logger.warn("WARNING: Skipping command %s", s['command']) + BLACKLISTED_SPECS.append(s['symbolic_name']) continue cmd_spec = InsightsCommand(self.config, s, self.mountpoint) self.archive.add_to_archive(cmd_spec) @@ -343,12 +350,14 @@ def run_collection(self, conf, rm_conf, branch_info, blacklist_report): for f in conf['files']: if f['file'] in rm_files or f.get('symbolic_name') in rm_files: logger.warn("WARNING: Skipping file %s", f['file']) + BLACKLISTED_SPECS.append(f['symbolic_name']) else: file_specs = self._parse_file_spec(f) for s in file_specs: # filter files post-wildcard parsing if s['file'] in rm_conf.get('files', []): logger.warn("WARNING: Skipping file %s", s['file']) + BLACKLISTED_SPECS.append(s['symbolic_name']) else: file_spec = InsightsFile(s, self.mountpoint) self.archive.add_to_archive(file_spec) @@ -361,11 +370,13 @@ def run_collection(self, conf, rm_conf, branch_info, blacklist_report): if g.get('symbolic_name') in rm_files: # ignore glob via symbolic name logger.warn("WARNING: Skipping file %s", g['glob']) + BLACKLISTED_SPECS.append(g['symbolic_name']) else: glob_specs = self._parse_glob_spec(g) for g in glob_specs: if g['file'] in rm_files: logger.warn("WARNING: Skipping file %s", g['file']) + BLACKLISTED_SPECS.append(g['symbolic_name']) else: glob_spec = InsightsFile(g, self.mountpoint) self.archive.add_to_archive(glob_spec) diff --git a/insights/collect.py b/insights/collect.py index 287fb9a63c..2888744931 100755 --- a/insights/collect.py +++ b/insights/collect.py @@ -9,6 +9,7 @@ """ from __future__ import print_function import argparse +import json import logging import os import sys @@ -19,6 +20,7 @@ from insights import apply_configs, apply_default_enabled, get_pool from insights.core import blacklist, dr, filters +from insights.core.blacklist import BLACKLISTED_SPECS from insights.core.exceptions import CalledProcessError from insights.core.serde import Hydration from insights.util import fs @@ -402,6 +404,7 @@ def collect(manifest=default_manifest, tmp_path=None, compress=False, rm_conf=No log.warning('WARNING: Unknown component in blacklist: %s' % component) else: dr.set_enabled(component, enabled=False) + BLACKLISTED_SPECS.append(component.split('.')[-1]) log.warning('WARNING: Skipping component: %s', component) to_persist = get_to_persist(client.get("persist", set())) @@ -438,6 +441,11 @@ def collect(manifest=default_manifest, tmp_path=None, compress=False, rm_conf=No broker.add_observer(h.make_persister(to_persist)) dr.run_all(broker=broker, pool=pool) + if BLACKLISTED_SPECS: + _write_out_blacklisted_specs(output_path) + # Delete the list so the specs aren't written again by the client. + del BLACKLISTED_SPECS[:] + collect_errors = _parse_broker_exceptions(broker, EXCEPTIONS_TO_REPORT) if compress: @@ -473,6 +481,41 @@ def _parse_broker_exceptions(broker, exceptions_to_report): return errors +def _write_out_blacklisted_specs(output_path): + """ + Write out the blacklisted specs to blacklisted_specs.txt, and create + a meta-data file for this file. That way it can be loaded when the + archive is processed. + + Args: + output_path (str): Path of the output directory. + """ + if os.path.exists(os.path.join(output_path, "meta_data")): + output_path_root = os.path.join(output_path, "data") + else: + output_path_root = output_path + + with open(os.path.join(output_path_root, "blacklisted_specs.txt"), "w") as of: + json.dump({"specs": BLACKLISTED_SPECS}, of) + + doc = { + "name": "insights.specs.Specs.blacklisted_specs", + "exec_time": 0.0, + "errors": [], + "results": { + "type": "insights.core.spec_factory.DatasourceProvider", + "object": { + "relative_path": "blacklisted_specs.txt" + } + }, + "ser_time": 0.0 + } + + meta_path = os.path.join(os.path.join(output_path, "meta_data"), "insights.specs.Specs.blacklisted_specs") + with open(meta_path, "w") as of: + json.dump(doc, of) + + def main(): # Remove command line args so that they are not parsed by any called modules # The main fxn is only invoked as a cli, if calling from another cli then diff --git a/insights/core/blacklist.py b/insights/core/blacklist.py index 64fe1f4558..271e28e08b 100644 --- a/insights/core/blacklist.py +++ b/insights/core/blacklist.py @@ -1,6 +1,7 @@ import re +BLACKLISTED_SPECS = [] _FILE_FILTERS = set() _COMMAND_FILTERS = set() _PATTERN_FILTERS = set() diff --git a/insights/core/dr.py b/insights/core/dr.py index ee8ff64744..fc2acf08d4 100644 --- a/insights/core/dr.py +++ b/insights/core/dr.py @@ -64,7 +64,8 @@ def add(a, b): from insights.contrib import importlib from insights.contrib.toposort import toposort_flatten -from insights.core.exceptions import MissingRequirements, ParseException, SkipComponent +from insights.core.blacklist import BLACKLISTED_SPECS +from insights.core.exceptions import BlacklistedSpec, MissingRequirements, ParseException, SkipComponent from insights.util import defaults, enum, KeyPassingDefaultDict log = logging.getLogger(__name__) @@ -1038,6 +1039,10 @@ def run_components(ordered_components, components, broker): log.info("Trying %s" % get_name(component)) result = DELEGATES[component].process(broker) broker[component] = result + except BlacklistedSpec as bs: + for x in get_registry_points(component): + BLACKLISTED_SPECS.append(str(x).split('.')[-1]) + broker.add_exception(component, bs, traceback.format_exc()) except MissingRequirements as mr: if log.isEnabledFor(logging.DEBUG): name = get_name(component) diff --git a/insights/core/exceptions.py b/insights/core/exceptions.py index aea9a8dfa4..2e155ba6be 100644 --- a/insights/core/exceptions.py +++ b/insights/core/exceptions.py @@ -1,6 +1,13 @@ from insights.util import deprecated +class BlacklistedSpec(Exception): + """ + Exception to be thrown when a blacklisted spec is found. + """ + pass + + class CalledProcessError(Exception): """ Raised if call fails. diff --git a/insights/core/spec_factory.py b/insights/core/spec_factory.py index 5d433f9230..3103e6d3e9 100644 --- a/insights/core/spec_factory.py +++ b/insights/core/spec_factory.py @@ -14,7 +14,7 @@ from insights.core import blacklist, dr from insights.core.context import ExecutionContext, FSRoots, HostContext -from insights.core.exceptions import ContentException, SkipComponent +from insights.core.exceptions import BlacklistedSpec, ContentException, SkipComponent from insights.core.filters import _add_filter, get_filters from insights.core.plugins import component, datasource, is_datasource from insights.core.serde import deserializer, serializer @@ -178,7 +178,7 @@ def __init__(self, relative_path, root="/", ds=None, ctx=None): def validate(self): if not blacklist.allow_file("/" + self.relative_path): log.warning("WARNING: Skipping file %s", "/" + self.relative_path) - raise SkipComponent() + raise BlacklistedSpec() if not os.path.exists(self.path): raise ContentException("%s does not exist." % self.path) @@ -333,7 +333,7 @@ def _misc_settings(self): def validate(self): if not blacklist.allow_command(self.cmd): log.warning("WARNING: Skipping command %s", self.cmd) - raise SkipComponent() + raise BlacklistedSpec() cmd = shlex.split(self.cmd)[0] if not which(cmd, env=self._env): diff --git a/insights/parsers/blacklisted.py b/insights/parsers/blacklisted.py new file mode 100644 index 0000000000..f82f38da6a --- /dev/null +++ b/insights/parsers/blacklisted.py @@ -0,0 +1,30 @@ +""" +BlacklistedSpecs - File ``blacklisted_specs.txt`` +================================================= +""" +from insights.core import JSONParser +from insights.core.plugins import parser +from insights.specs import Specs + + +@parser(Specs.blacklisted_specs) +class BlacklistedSpecs(JSONParser): + """ + Parses the blacklisted_specs.txt file generated on archive creation. + + Typical output:: + "{"specs": ["insights.specs.default.DefaultSpecs.dmesg", "insights.specs.default.DefaultSpecs.fstab"]}" + + Attributes: + specs (list): List of blacklisted specs. + + Examples: + >>> type(specs) + + >>> result = ['insights.specs.default.DefaultSpecs.dmesg', 'insights.specs.default.DefaultSpecs.fstab'] + >>> specs.specs == result + True + """ + @property + def specs(self): + return self.data['specs'] diff --git a/insights/specs/__init__.py b/insights/specs/__init__.py index 20f28bd9c4..3b3a29ebbd 100644 --- a/insights/specs/__init__.py +++ b/insights/specs/__init__.py @@ -25,6 +25,7 @@ class Specs(SpecSet): azure_instance_type = RegistryPoint() bdi_read_ahead_kb = RegistryPoint(multi_output=True) bios_uuid = RegistryPoint() + blacklisted_specs = RegistryPoint() blkid = RegistryPoint() bond = RegistryPoint(multi_output=True) bond_dynamic_lb = RegistryPoint(multi_output=True) diff --git a/insights/specs/insights_archive.py b/insights/specs/insights_archive.py index f3d51ead0e..5a9059e60f 100644 --- a/insights/specs/insights_archive.py +++ b/insights/specs/insights_archive.py @@ -24,6 +24,7 @@ class InsightsArchiveSpecs(Specs): azure_instance_type = simple_file("insights_commands/python_-m_insights.tools.cat_--no-header_azure_instance_type") azure_instance_plan = simple_file("insights_commands/python_-m_insights.tools.cat_--no-header_azure_instance_plan") bios_uuid = simple_file("insights_commands/dmidecode_-s_system-uuid") + blacklisted_specs = simple_file("blacklisted_specs.txt") blkid = simple_file("insights_commands/blkid_-c_.dev.null") brctl_show = simple_file("insights_commands/brctl_show") ceph_df_detail = first_file(["insights_commands/ceph_df_detail_-f_json-pretty", "insights_commands/ceph_df_detail_-f_json"]) diff --git a/insights/tests/parsers/test_blacklist.py b/insights/tests/parsers/test_blacklist.py new file mode 100644 index 0000000000..67f6009117 --- /dev/null +++ b/insights/tests/parsers/test_blacklist.py @@ -0,0 +1,29 @@ +import doctest +import pytest + +from insights.core.exceptions import SkipComponent +from insights.parsers import blacklisted +from insights.parsers.blacklisted import BlacklistedSpecs +from insights.tests import context_wrap + + +SPECS = '{"specs": ["insights.specs.default.DefaultSpecs.dmesg", "insights.specs.default.DefaultSpecs.fstab"]}' + + +def test_blacklisted_doc_examples(): + env = { + "specs": BlacklistedSpecs(context_wrap(SPECS)), + } + failed, total = doctest.testmod(blacklisted, globs=env) + assert failed == 0 + + +def test_skip(): + with pytest.raises(SkipComponent) as ex: + BlacklistedSpecs(context_wrap("")) + assert "Empty output." in str(ex) + + +def test_blacklist_specs(): + bs = BlacklistedSpecs(context_wrap(SPECS)) + assert bs.specs[0] == "insights.specs.default.DefaultSpecs.dmesg"