Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: enhance add_filter to support the max matched lines #4303

Merged
merged 2 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 41 additions & 29 deletions insights/core/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,12 @@
from insights.util import parse_bool

_CACHE = {}
FILTERS = defaultdict(set)
FILTERS = defaultdict(dict)
ENABLED = parse_bool(os.environ.get("INSIGHTS_FILTERS_ENABLED"), default=True)
MATCH_COUNT = 10000
MAX_MATCH = 10000


# TODO:
# - support specifying the max match number of filtered lines
# add_filter(Messages, "Such an Error", 10)
# def add_filter(component, patterns, match_count=MATCH_COUNT):
def add_filter(component, patterns):
def add_filter(component, patterns, max_match=MAX_MATCH):
"""
Add a filter or list of filters to a component. When the component is
a datasource, the filter will be directly added to that datasouce.
Expand All @@ -71,8 +67,26 @@ def add_filter(component, patterns):
parser or combiner.
patterns (str, [str]): A string, list of strings, or set of strings to
add to the datasource's filters.
max_match (int): A int, the maximum matched lines to filter out.
MAX_MATCH by default.
JoySnow marked this conversation as resolved.
Show resolved Hide resolved
"""

def get_dependency_datasources(comp):
"""Get (all) the first depended datasource"""
dep_ds = set()
if plugins.is_datasource(comp):
dep_ds.add(comp)
return dep_ds
for dep in dr.get_dependencies(comp):
dep_ds.update(get_dependency_datasources(dep))
return dep_ds

def none_max(a, b):
return a if b is None else b if a is None else max(a, b)

def max_matchs(da, db):
return dict((k, none_max(da.get(k), db.get(k))) for k in set(da.keys()).union(db.keys()))

def inner(comp, patterns):
if comp in _CACHE:
del _CACHE[comp]
Expand All @@ -82,25 +96,16 @@ def inner(comp, patterns):
raise TypeError("Filter patterns must be of type string, list, or set.")

if isinstance(patterns, six.string_types):
patterns = set([patterns])
patterns = {patterns: max_match}
elif isinstance(patterns, list):
patterns = set(patterns)
patterns = dict((pt, max_match) for pt in patterns)
# here patterns is a dict

for pat in patterns:
if not pat:
raise Exception("Filter patterns must not be empty.")

FILTERS[comp] |= patterns

def get_dependency_datasources(comp):
"""Get (all) the first depended datasource"""
dep_ds = set()
if plugins.is_datasource(comp):
dep_ds.add(comp)
return dep_ds
for dep in dr.get_dependencies(comp):
dep_ds.update(get_dependency_datasources(dep))
return dep_ds
FILTERS[comp].update(max_matchs(FILTERS[comp], patterns))

if not plugins.is_datasource(component):
deps = get_dependency_datasources(component)
Expand All @@ -127,7 +132,7 @@ def get_dependency_datasources(comp):
_add_filter = add_filter


def get_filters(component):
def get_filters(component, with_matches=False):
"""
Get the set of filters for the given datasource.

Expand All @@ -143,13 +148,19 @@ def get_filters(component):

Args:
component (a datasource): The target datasource
with_matches (boolean): Needs the max matches being returned? False by
default.

Returns:
set: The set of filters defined for the datasource
(set or dict): when `with_matches=False`, returns the set of filters
defined for the datasource only.
when `with_matches=True`, returns filters defined for
the datasource with the max match count specified by
`add_filter`.
"""

def inner(c, filters=None):
filters = filters or set()
filters = filters or dict()

if hasattr(c, 'filterable') and c.filterable is False:
return filters
Expand All @@ -161,20 +172,21 @@ def inner(c, filters=None):
return filters

if c in FILTERS:
filters |= FILTERS[c]
filters.update(FILTERS[c])

for d in dr.get_dependents(c):
filters |= inner(d, filters)
filters.update(inner(d, filters))

return filters

if not component:
# No filters for nothing
return set()
return dict() if with_matches else set()

if component not in _CACHE:
_CACHE[component] = inner(component)

return _CACHE[component]
return _CACHE[component] if with_matches else set(_CACHE[component].keys())


def apply_filters(target, lines):
Expand Down Expand Up @@ -202,7 +214,7 @@ def loads(string):
"""Loads the filters dictionary given a string."""
d = _loads(string)
for k, v in d.items():
FILTERS[dr.get_component(k) or k] = set(v)
FILTERS[dr.get_component(k) or k] = v


def load(stream=None):
Expand All @@ -222,7 +234,7 @@ def dumps():
"""Returns a string representation of the sorted FILTERS dictionary."""
d = {}
for k, v in FILTERS.items():
d[dr.get_name(k)] = sorted(v)
d[dr.get_name(k)] = dict(sorted(v.items()))
return _dumps(d)


Expand Down
4 changes: 3 additions & 1 deletion insights/core/spec_cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
Obfuscate the IP or Hostname appears in the spec content according to the
specs native requirement and user configuration.

- Filtering
Filter line as per the allow list got from the "filters.yaml"
"""

import logging
Expand Down Expand Up @@ -360,7 +362,7 @@ def _filter_line_per_allowlist(self, line, allow_info):
for a_key in list(allow_info.keys()):
# keep line when any filter match
# FIXME:
# Considering performance, din't handle multiple filters in one same line
# Considering performance, didn't handle multiple filters in one same line
if a_key in line:
allow_info[a_key] -= 1
# stop checking it when enough lines contain the key were found
Expand Down
8 changes: 4 additions & 4 deletions insights/core/spec_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def __init__(self):
self._content = None
self._exception = None
self._filterable = False
self._filters = set()
self._filters = dict()

def load(self):
raise NotImplementedError()
Expand Down Expand Up @@ -95,7 +95,7 @@ def _clean_content(self):
allowlist = None
if self._filterable:
cleans.append("Filter")
allowlist = dict((f, filters.MATCH_COUNT) for f in self._filters)
allowlist = self._filters
# Cleaning - Entry
if cleans:
log.debug("Cleaning (%s) %s", "/".join(cleans), self.relative_path)
Expand Down Expand Up @@ -208,7 +208,7 @@ def __init__(self, relative_path, root="/", save_as=None, ds=None, ctx=None, cle
if self.ds and filters.ENABLED
else False
)
self._filters = filters.get_filters(self.ds) if self.ds else set()
self._filters = filters.get_filters(self.ds, True) if self.ds else set()

self.validate()

Expand Down Expand Up @@ -361,7 +361,7 @@ def __init__(
if self.ds and filters.ENABLED
else False
)
self._filters = filters.get_filters(self.ds) if self.ds else set()
self._filters = filters.get_filters(self.ds, True) if self.ds else set()

self.validate()

Expand Down
73 changes: 45 additions & 28 deletions insights/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,14 @@

def _intercept_add_filter(func):
@wraps(func)
def inner(component, pattern):
ret = add_filter(component, pattern)
def inner(component, pattern, max_match=filters.MAX_MATCH):
ret = add_filter(component, pattern, max_match=max_match)
calling_module = inspect.stack()[1][0].f_globals.get("__name__")
ADDED_FILTERS[calling_module] |= set(r for r in dr.get_registry_points(component) if r.filterable)
ADDED_FILTERS[calling_module] |= set(
r for r in dr.get_registry_points(component) if r.filterable
)
return ret

return inner


Expand All @@ -51,6 +54,7 @@ def inner(ds, pattern):
calling_module = inspect.stack()[1][0].f_globals.get("__name__")
ADDED_FILTERS[calling_module].add(ds)
return ret

return inner


Expand Down Expand Up @@ -92,8 +96,11 @@ def _beautify_deep_compare_diff(result, expected):
diff.append('\tkey "{0}" not in Result;'.format(k))
for k in common_keys:
if not eq(result[k], expected[k]):
diff.append('\tkey "{0}" unequal values:\n\t\tExpected: {1}\n\t\tResult : {2}'.format(
k, expected[k], result[k]))
diff.append(
'\tkey "{0}" unequal values:\n\t\tExpected: {1}\n\t\tResult : {2}'.format(
k, expected[k], result[k]
)
)
if not diff:
diff.append('\tUnrecognized unequal values in result layer one;')

Expand All @@ -118,7 +125,7 @@ def deep_compare(result, expected):
# This case ensures that when rules return a make_none() response, all of the older
# CI tests that are looking for None instead of make_none() will still pass
if result is None or (isinstance(result, dict) and result.get("type") == "none"):
assert (expected is None or expected == MAKE_NONE_RESULT), result
assert expected is None or expected == MAKE_NONE_RESULT, result
return

if isinstance(result, dict) and expected is None:
Expand Down Expand Up @@ -150,12 +157,11 @@ def run_input_data(component, input_data, store_skips=False):
'CloudInstance': ['insights.parsers.subscription_manager.SubscriptionManagerFacts'],
'CloudProvider': ['insights.parsers.rhsm_conf.RHSMConf'],
'OSRelease': ['insights.parsers.dmesg.DmesgLineList'],
'Sap': ['insights.parsers.saphostctrl.SAPHostCtrlInstances']
'Sap': ['insights.parsers.saphostctrl.SAPHostCtrlInstances'],
}


def run_test(component, input_data,
expected=_UNDEFINED, return_make_none=False, do_filter=True):
def run_test(component, input_data, expected=_UNDEFINED, return_make_none=False, do_filter=True):
"""
Arguments:
component: The insights component need to test.
Expand All @@ -165,6 +171,7 @@ def run_test(component, input_data,
do_filter: Does need to check dependency spec filter warning?
- it's not required to check the filters for sosreport
"""

def get_filtered_specs(module):
filtered = set()
mods = dir(importlib.import_module(module))
Expand All @@ -183,7 +190,9 @@ def get_filtered_specs(module):
rps = dr.get_registry_points(component)
filtered = get_filtered_specs(mod)
filterable = set(d for d in rps if dr.get_delegate(d).filterable) - filtered
missing_filters = filterable - ADDED_FILTERS.get(mod, set()) - ADDED_FILTERS.get(sup_mod, set())
missing_filters = (
filterable - ADDED_FILTERS.get(mod, set()) - ADDED_FILTERS.get(sup_mod, set())
)
if missing_filters:
names = [dr.get_name(m) for m in missing_filters]
msg = "%s must add filters to %s"
Expand All @@ -204,16 +213,18 @@ def integrate(input_data, component):
return run_test(component, input_data)


def context_wrap(lines,
path="path",
hostname=DEFAULT_HOSTNAME,
release=DEFAULT_RELEASE,
version="-1.-1",
machine_id="machine_id",
strip=True,
split=True,
filtered_spec=None,
**kwargs):
def context_wrap(
lines,
path="path",
hostname=DEFAULT_HOSTNAME,
release=DEFAULT_RELEASE,
version="-1.-1",
machine_id="machine_id",
strip=True,
split=True,
filtered_spec=None,
**kwargs
):
if isinstance(lines, six.string_types):
if strip:
lines = lines.strip()
Expand All @@ -223,10 +234,16 @@ def context_wrap(lines,
if filtered_spec is not None and filtered_spec in filters.FILTERS:
lines = [l for l in lines if any([f in l for f in filters.FILTERS[filtered_spec]])]

return Context(content=lines,
path=path, hostname=hostname,
release=release, version=version.split("."),
machine_id=machine_id, relative_path=path, **kwargs)
return Context(
content=lines,
path=path,
hostname=hostname,
release=release,
version=version.split("."),
machine_id=machine_id,
relative_path=path,
**kwargs
)


input_data_cache = {}
Expand All @@ -236,10 +253,7 @@ def context_wrap(lines,


def create_metadata(system_id, product):
ctx_metadata = {
"system_id": system_id,
"links": []
}
ctx_metadata = {"system_id": system_id, "links": []}
ctx_metadata["type"] = product.role
ctx_metadata["product"] = product.__class__.__name__
return json.dumps(ctx_metadata)
Expand All @@ -265,6 +279,7 @@ class InputData(object):
contain the specified value in the context.path field. This is useful for
testing pattern-like file parsers.
"""

def __init__(self, name=None, hostname=None):
cnt = input_data_cache.get(name, 0)
self.name = "{0}-{1:0>5}".format(name, cnt)
Expand Down Expand Up @@ -421,6 +436,7 @@ def archive_provider(component, test_func=deep_compare, stride=1):

[1] insights.tests.deep_compare()
"""

def _wrap(func):
@six.wraps(func)
def __wrap(stride=stride):
Expand All @@ -430,4 +446,5 @@ def __wrap(stride=stride):
__wrap.stride = stride
ARCHIVE_GENERATORS.append(__wrap)
return __wrap

return _wrap
Loading
Loading