From 2ffede6f42bd56b183bfac61056d05954fa22c72 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Mon, 5 Aug 2024 16:24:40 +0300 Subject: [PATCH] softreset --- credsweeper/filters/__init__.py | 1 + credsweeper/filters/group/group.py | 2 +- .../filters/value_base64_part_check.py | 56 +++++++++++++++++++ credsweeper/rules/config.yaml | 4 +- credsweeper/scanner/scan_type/scan_type.py | 33 ++++++----- tests/filters/test_value_base64_part_check.py | 33 +++++++++++ tests/samples/test.html | 2 +- 7 files changed, 111 insertions(+), 20 deletions(-) create mode 100644 credsweeper/filters/value_base64_part_check.py create mode 100644 tests/filters/test_value_base64_part_check.py diff --git a/credsweeper/filters/__init__.py b/credsweeper/filters/__init__.py index 66de37b97..986728d22 100644 --- a/credsweeper/filters/__init__.py +++ b/credsweeper/filters/__init__.py @@ -9,6 +9,7 @@ from credsweeper.filters.value_base64_data_check import ValueBase64DataCheck from credsweeper.filters.value_base64_encoded_pem_check import ValueBase64EncodedPem from credsweeper.filters.value_base64_key_check import ValueBase64KeyCheck +from credsweeper.filters.value_base64_part_check import ValueBase64PartCheck from credsweeper.filters.value_blocklist_check import ValueBlocklistCheck from credsweeper.filters.value_camel_case_check import ValueCamelCaseCheck from credsweeper.filters.value_couple_keyword_check import ValueCoupleKeywordCheck diff --git a/credsweeper/filters/group/group.py b/credsweeper/filters/group/group.py index 6ee25387d..7f1bc3c6a 100644 --- a/credsweeper/filters/group/group.py +++ b/credsweeper/filters/group/group.py @@ -60,5 +60,5 @@ def get_pattern_base_filters(config: Config) -> List[Filter]: return [ # LineSpecificKeyCheck(), # ValuePatternCheck(config), # - ValuePatternLengthCheck(config) + ValuePatternLengthCheck(config), # ] diff --git a/credsweeper/filters/value_base64_part_check.py b/credsweeper/filters/value_base64_part_check.py new file mode 100644 index 000000000..d41a7ed8c --- /dev/null +++ b/credsweeper/filters/value_base64_part_check.py @@ -0,0 +1,56 @@ +import contextlib +import statistics + +from credsweeper.common.constants import Chars +from credsweeper.config import Config +from credsweeper.credentials import LineData +from credsweeper.file_handler.analysis_target import AnalysisTarget +from credsweeper.filters import Filter +from credsweeper.utils import Util + + +class ValueBase64PartCheck(Filter): + """ + Check that candidate is NOT a part of base64 long line + """ + + def __init__(self, config: Config = None) -> None: + pass + + def run(self, line_data: LineData, target: AnalysisTarget) -> bool: + """Run filter checks on received weird base64 token which must be a random string + + Args: + line_data: credential candidate data + target: multiline target from which line data was obtained + + Return: + True, when need to filter candidate and False if left + + """ + + with contextlib.suppress(Exception): + if line_data.value_start and '/' == line_data.line[line_data.value_start - 1]: + if '-' in line_data.value or '_' in line_data.value: + # the value contains url-safe chars, so '/' is a delimiter + return False + value_entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE64STD_CHARS.value) + left_start = line_data.value_start - len(line_data.value) + if 0 > left_start: + left_start = 0 + left_entropy = Util.get_shannon_entropy(line_data.line[left_start:line_data.value_start], + Chars.BASE64STD_CHARS.value) + right_end = line_data.value_end + len(line_data.value) + if len(line_data.line) < right_end: + right_end = len(line_data.line) + right_entropy = Util.get_shannon_entropy(line_data.line[line_data.value_end:right_end], + Chars.BASE64STD_CHARS.value) + data = [value_entropy, left_entropy, right_entropy] + avg = statistics.mean(data) + stdev = statistics.stdev(data, avg) + avg_min = avg - stdev + if avg_min < left_entropy and avg_min < right_entropy: + # high entropy of bound parts looks like a part of base64 long line + return True + + return False diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index fda7112eb..3b51f6507 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -233,7 +233,9 @@ type: pattern values: - (?EAA[0-9A-Za-z]{80,800}) - filter_type: GeneralPattern + filter_type: + - ValuePatternCheck + - ValueBase64PartCheck required_substrings: - EAA min_line_len: 80 diff --git a/credsweeper/scanner/scan_type/scan_type.py b/credsweeper/scanner/scan_type/scan_type.py index 8a95df687..ffcec526c 100644 --- a/credsweeper/scanner/scan_type/scan_type.py +++ b/credsweeper/scanner/scan_type/scan_type.py @@ -164,24 +164,23 @@ def _get_candidates(cls, config: Config, rule: Rule, target: AnalysisTarget) -> if config.exclude_lines and target.line_strip in config.exclude_lines: return candidates - line_data_list = cls.get_line_data_list(config=config, - target=target, - pattern=rule.patterns[0], - filters=rule.filters) - - for line_data in line_data_list: - if config.exclude_values and line_data.value.strip() in config.exclude_values: - continue - - candidate = Candidate([line_data], rule.patterns, rule.rule_name, rule.severity, config, rule.validations, - rule.use_ml, rule.confidence) - # single pattern with multiple values means all the patterns must matched in target - if 1 < len(rule.patterns) and rule.rule_type in (RuleType.PATTERN, RuleType.KEYWORD): - # additional check whether all patterns match - if not cls._aux_scan(config, rule, target, candidate): - # cannot find secondary values for the candidate + if line_data_list := cls.get_line_data_list(config=config, + target=target, + pattern=rule.patterns[0], + filters=rule.filters): + for line_data in line_data_list: + if config.exclude_values and line_data.value.strip() in config.exclude_values: continue - candidates.append(candidate) + + candidate = Candidate([line_data], rule.patterns, rule.rule_name, rule.severity, config, + rule.validations, rule.use_ml, rule.confidence) + # single pattern with multiple values means all the patterns must matched in target + if 1 < len(rule.patterns) and rule.rule_type in (RuleType.PATTERN, RuleType.KEYWORD): + # additional check whether all patterns match + if not cls._aux_scan(config, rule, target, candidate): + # cannot find secondary values for the candidate + continue + candidates.append(candidate) return candidates @classmethod diff --git a/tests/filters/test_value_base64_part_check.py b/tests/filters/test_value_base64_part_check.py new file mode 100644 index 000000000..790ca9008 --- /dev/null +++ b/tests/filters/test_value_base64_part_check.py @@ -0,0 +1,33 @@ +import re +import unittest + +from credsweeper.credentials import LineData +from credsweeper.filters import ValueBase64PartCheck +from tests.filters.conftest import DUMMY_ANALYSIS_TARGET + + +class TestValueBase64PartCheck(unittest.TestCase): + EAA_PATTERN = re.compile(r"(?P\bEAA[0-9A-Za-z]{32})") + + def test_value_check_n(self) -> None: + line_data = LineData(config=None, + path="dummy", + file_type="", + line="qcE81rS+FJHuvg39lz4T/EAACEb00Kse0BAlGy7KeQ5YnaCEd09Eo" + "se0cBAlGy7KeQ5Yna9CoDsup39tiYdoQ4jH9Coup39tiYdWoQ4jHFZD", + info="", + line_num=1, + line_pos=0, + pattern=TestValueBase64PartCheck.EAA_PATTERN) + self.assertTrue(ValueBase64PartCheck().run(line_data, DUMMY_ANALYSIS_TARGET)) + + def test_value_check_p(self) -> None: + line_data = LineData(config=None, + path="dummy", + file_type="", + line="http://meta.test/api/EAACRvAWiwzR8rcXFsLiUH13ybj0tdEa?x=login", + info="", + line_num=1, + line_pos=0, + pattern=TestValueBase64PartCheck.EAA_PATTERN) + self.assertFalse(ValueBase64PartCheck().run(line_data, DUMMY_ANALYSIS_TARGET)) diff --git a/tests/samples/test.html b/tests/samples/test.html index d07d0420f..65964bccb 100644 --- a/tests/samples/test.html +++ b/tests/samples/test.html @@ -114,7 +114,7 @@ nested table - + xml capability