diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 8865960af..187449c82 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -22,7 +22,8 @@ jobs: - name: Checkout CredData uses: actions/checkout@v4 with: - repository: Samsung/CredData + repository: babenek/CredData + ref: valsanitizer - name: Markup hashing run: | @@ -72,7 +73,8 @@ jobs: - name: Checkout CredData uses: actions/checkout@v4 with: - repository: Samsung/CredData + repository: babenek/CredData + ref: valsanitizer - name: Markup hashing run: | @@ -114,15 +116,10 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} path: temp/CredSweeper - - name: Patch benchmark for PR work - run: | - sed -i 's|CREDSWEEPER = "https://github.com/Samsung/CredSweeper.git"|CREDSWEEPER = "dummy://github.com/Samsung/CredSweeper.git"|' benchmark/common/constants.py - grep --with-filename --line-number 'dummy://github.com/Samsung/CredSweeper.git' benchmark/common/constants.py - - name: Install CredSweeper run: | python -m pip install temp/CredSweeper - credsweeper_head= + python -m credsweeper --banner - name: Run CredSweeper tool run: | @@ -174,7 +171,8 @@ jobs: - name: Checkout CredData uses: actions/checkout@v4 with: - repository: Samsung/CredData + repository: babenek/CredData + ref: valsanitizer - name: Markup hashing run: | @@ -355,7 +353,8 @@ jobs: - name: Checkout CredData uses: actions/checkout@v4 with: - repository: Samsung/CredData + repository: babenek/CredData + ref: valsanitizer - name: Markup hashing run: | diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 9270f66b6..0cfa1cf9b 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -187,9 +187,9 @@ jobs: file_crc32_int=$((16#${file_crc32_hex})) crc32_int=$(( ${crc32_int} ^ ${file_crc32_int} )) done - version_with_crc="$(credsweeper --version | head -1) crc32:$(printf '%x' ${crc32_int})" + version_with_crc="$(python -m credsweeper --version | head -1) crc32:$(printf '%x' ${crc32_int})" echo "version_with_crc = '${version_with_crc}'" - banner=$(credsweeper --banner --path requirements.txt | head -1) + banner=$(python -m credsweeper --banner | head -1) echo "banner = '${banner}'" if ! [ -n "${version_with_crc}" ] && [ -n "${banner}" ] && [ "${version_with_crc}" == "${banner}" ]; then echo "'${version_with_crc}' != '${banner}'" diff --git a/cicd/benchmark.txt b/cicd/benchmark.txt index 2664b91ac..455843228 100644 --- a/cicd/benchmark.txt +++ b/cicd/benchmark.txt @@ -84,7 +84,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .jenkinsfile 1 58 1 7 .jinja2 1 64 2 .js 659 536413 541 2645 336 -.json 861 13670751 914 11012 143 +.json 861 13670751 917 11012 143 .jsp 13 3202 1 42 .jsx 7 857 19 .jwt 6 8 7 @@ -222,8 +222,8 @@ FileType FileNumber ValidLines Positives Negatives Templat .yml 418 36162 467 920 384 .zsh 6 872 12 .zsh-theme 1 97 1 -TOTAL: 10333 16988573 8373 60441 5233 -credsweeper result_cnt : 7795, lost_cnt : 0, true_cnt : 7226, false_cnt : 569 +TOTAL: 10333 16988573 8376 60441 5233 +credsweeper result_cnt : 7800, lost_cnt : 0, true_cnt : 7230, false_cnt : 570 Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------ ----------- ----------- ----------- ---------- ---- ---- ----- ---- -------- -------- -------- -------- -------- -------- API 123 3163 185 112 109 3 3345 14 0.000896 0.113821 0.995102 0.973214 0.886179 0.927660 @@ -231,14 +231,14 @@ AWS Client ID 168 13 0 16 AWS Multi 75 12 0 87 75 11 1 0 0.916667 0.000000 0.873563 0.872093 1.000000 0.931677 AWS S3 Bucket 61 25 0 87 61 24 1 0 0.960000 0.000000 0.720930 0.717647 1.000000 0.835616 Atlassian Old PAT token 27 212 3 12 3 8 207 24 0.037209 0.888889 0.867769 0.272727 0.111111 0.157895 -Auth 406 2726 77 371 350 21 2782 56 0.007492 0.137931 0.976005 0.943396 0.862069 0.900901 +Auth 406 2726 77 372 350 22 2781 56 0.007849 0.137931 0.975693 0.940860 0.862069 0.899743 Azure Access Token 19 0 0 0 0 0 19 1.000000 0.000000 0.000000 BASE64 Private Key 7 2 0 7 7 0 2 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 BASE64 encoded PEM Private Key 7 0 0 5 5 0 0 2 0.285714 0.714286 1.000000 0.714286 0.833333 Bitbucket Client ID 142 1813 9 46 27 18 1804 115 0.009879 0.809859 0.932281 0.600000 0.190141 0.288770 Bitbucket Client Secret 230 535 10 44 33 11 534 197 0.020183 0.856522 0.731613 0.750000 0.143478 0.240876 Certificate 25 459 1 21 20 1 459 5 0.002174 0.200000 0.987629 0.952381 0.800000 0.869565 -Credential 91 155 74 87 84 3 226 7 0.013100 0.076923 0.968750 0.965517 0.923077 0.943820 +Credential 91 155 74 90 87 3 226 4 0.013100 0.043956 0.978125 0.966667 0.956044 0.961326 Docker Swarm Token 2 0 0 2 2 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 Dropbox App secret 62 114 0 46 36 9 105 26 0.078947 0.419355 0.801136 0.800000 0.580645 0.672897 Facebook Access Token 0 1 0 0 0 1 0 0.000000 1.000000 @@ -255,11 +255,11 @@ IPv6 33 131 0 3 JSON Web Token 284 11 2 274 271 3 10 13 0.230769 0.045775 0.946128 0.989051 0.954225 0.971326 Jira / Confluence PAT token 0 4 0 0 0 4 0 0.000000 1.000000 Jira 2FA 14 6 0 10 10 0 6 4 0.000000 0.285714 0.800000 1.000000 0.714286 0.833333 -Key 483 8494 464 444 435 9 8949 48 0.001005 0.099379 0.993963 0.979730 0.900621 0.938511 +Key 483 8494 464 445 436 9 8949 47 0.001005 0.097308 0.994068 0.979775 0.902692 0.939655 Nonce 83 53 0 85 79 6 47 4 0.113208 0.048193 0.926471 0.929412 0.951807 0.940476 Other 0 0 5 0 0 5 0 0.000000 1.000000 PEM Private Key 1019 1483 0 1023 1019 4 1479 0 0.002697 0.000000 0.998401 0.996090 1.000000 0.998041 -Password 1820 7475 2752 1681 1614 67 10160 206 0.006551 0.113187 0.977339 0.960143 0.886813 0.922022 +Password 1823 7475 2752 1681 1614 67 10160 209 0.006551 0.114646 0.977095 0.960143 0.885354 0.921233 Salt 42 76 2 38 38 0 78 4 0.000000 0.095238 0.966667 1.000000 0.904762 0.950000 Secret 1358 28497 869 1234 1229 5 29361 129 0.000170 0.094993 0.995639 0.995948 0.905007 0.948302 Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000 @@ -267,4 +267,5 @@ Slack Token 4 1 0 Token 585 3972 439 519 511 8 4403 74 0.001814 0.126496 0.983587 0.984586 0.873504 0.925725 Twilio API Key 0 5 2 0 0 7 0 0.000000 1.000000 URL Credentials 194 125 251 184 184 0 376 10 0.000000 0.051546 0.982456 1.000000 0.948454 0.973545 - 8373 60441 5233 7937 7226 569 59872 1147 0.009414 0.136988 0.975063 0.927004 0.863012 0.893864 + 8376 60441 5233 7942 7230 570 59871 1146 0.009431 0.136819 0.975064 0.926923 0.863181 0.893917 + diff --git a/credsweeper/common/constants.py b/credsweeper/common/constants.py index 39166c68c..292fee633 100644 --- a/credsweeper/common/constants.py +++ b/credsweeper/common/constants.py @@ -5,7 +5,7 @@ class KeywordPattern: """Pattern set of keyword types""" - key_left = r"(?P(([`'\"]+[^:='\"`}<>\\/&?]*|[^:='\"`}<>\s()\\/&?]*)" \ + key_left = r"(\\[nrt])?(?P(([`'\"]+[^:='\"`}<>\\/&?]*|[^:='\"`}<>\s()\\/&?]*)" \ r"(?P" # there will be inserted a keyword key_right = r")" \ diff --git a/credsweeper/credentials/line_data.py b/credsweeper/credentials/line_data.py index 4b9ab6a42..42dcd4310 100644 --- a/credsweeper/credentials/line_data.py +++ b/credsweeper/credentials/line_data.py @@ -31,6 +31,7 @@ class LineData: quotation_marks = ('"', "'", '`') comment_starts = ("//", "* ", "#", "/*", "|\\w+?\\>|\\&)") + line_endings = re.compile(r"\\{1,8}[nr]") url_param_split = re.compile(r"(%|\\u(00){0,2})(26|3f)", flags=re.IGNORECASE) # some symbols e.g. double quotes cannot be in URL string https://www.ietf.org/rfc/rfc1738.txt # \ - was added for case of url in escaped string \u0026amp; - means escaped & in HTML @@ -180,6 +181,10 @@ def clean_bash_parameters(self) -> None: # and value can be split by bash special characters if len(value_spl) > 1: self.value = value_spl[0] + if ' ' not in self.value and ("\\n" in self.value or "\\r" in self.value): + value_whsp = self.line_endings.split(self.value) + if len(value_whsp) > 1: + self.value = value_whsp[0] def sanitize_variable(self) -> None: """Remove trailing spaces, dashes and quotations around the variable. Correct position.""" diff --git a/credsweeper/filters/value_atlassian_token_check.py b/credsweeper/filters/value_atlassian_token_check.py index 57c9ac688..f2239a208 100644 --- a/credsweeper/filters/value_atlassian_token_check.py +++ b/credsweeper/filters/value_atlassian_token_check.py @@ -32,8 +32,13 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool: if value.startswith("BBDC-"): # Bitbucket HTTP Access Token return ValueAtlassianTokenCheck.check_atlassian_struct(value[5:]) - elif value.startswith("ATBB"): + elif value.startswith("AT"): # Bitbucket App password + while "\\=" in value or "%3d" in value or "%3D" in value: + # = sign may be escaped in URL https://www.rfc-editor.org/rfc/rfc3986 + value = value.replace('\\', '') + value = value.replace('%3d', '=') + value = value.replace('%3D', '=') return ValueAtlassianTokenCheck.check_crc32_struct(value) else: # Jira / Confluence PAT token @@ -43,9 +48,10 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool: @staticmethod def check_crc32_struct(value: str) -> bool: """Returns False if value is valid for bitbucket app password structure 'payload:crc32'""" - crc32 = int(value[28:], 16) - data = value[:28].encode(ASCII) - if crc32 == binascii.crc32(data): + crc32 = int(value[-8:], 16) + data = value[:-8].encode(ASCII) + data_crc32 = binascii.crc32(data) + if crc32 == data_crc32: return False return True diff --git a/credsweeper/filters/value_file_path_check.py b/credsweeper/filters/value_file_path_check.py index 2a18b7188..61aa96b46 100644 --- a/credsweeper/filters/value_file_path_check.py +++ b/credsweeper/filters/value_file_path_check.py @@ -12,6 +12,9 @@ class ValueFilePathCheck(Filter): Check if a value contains either '/' or ':\' separators (but not both) and do not have any special characters ( !$@`&*()+) """ + base64_possible_set = set(Chars.BASE64_CHARS.value) | set(Chars.BASE64URL_CHARS.value) + unusual_windows_symbols_in_path = "\t\n\r !$@`&*()[]{}<>+=;,~" + unusual_linux_symbols_in_path = unusual_windows_symbols_in_path + ":\\" def __init__(self, config: Config = None) -> None: pass @@ -30,25 +33,32 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool: value = line_data.value contains_unix_separator = '/' in value if contains_unix_separator: + if "://" in value or value.startswith("~/") or value.startswith("./") or "../" in value or "/.." in value: + # common case for url definition or aliases + return True # base64 encoded data might look like linux path min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(value)) # get minimal entropy to compare with shannon entropy of found value # min_entropy == 0 means that the value cannot be checked with the entropy due high variance - if 0 == min_entropy or min_entropy > Util.get_shannon_entropy(value, Chars.BASE64STD_CHARS.value): - for i in value: - if i not in Chars.BASE64STD_CHARS.value: - # value contains wrong BASE64STD_CHARS symbols - break - else: - # all symbols are from base64 alphabet - contains_unix_separator = 1 < value.count('/') + for i in value: + if i not in self.base64_possible_set: + # value contains wrong BASE64STD_CHARS symbols like . + break else: - # high entropy means base64 encoded data - contains_unix_separator = False + # all symbols are from base64 alphabet + entropy = Util.get_shannon_entropy(value, Chars.BASE64STD_CHARS.value) + if 0 == min_entropy or min_entropy > entropy: + contains_unix_separator = 1 < value.count('/') + else: + # high entropy means base64 encoded data + contains_unix_separator = False + # low shannon entropy points that the value maybe not a high randomized value in base64 contains_windows_separator = ':\\' in value if contains_unix_separator or contains_windows_separator: - for i in " !$@`&*()[]{}+=;,": + unusual_symbols_in_path = self.unusual_linux_symbols_in_path if contains_unix_separator \ + else self.unusual_windows_symbols_in_path + for i in unusual_symbols_in_path: if i in value: # the symbols which not passed in a path usually break diff --git a/credsweeper/ml_model/ml_validator.py b/credsweeper/ml_model/ml_validator.py index 501c3222b..743501f04 100644 --- a/credsweeper/ml_model/ml_validator.py +++ b/credsweeper/ml_model/ml_validator.py @@ -220,8 +220,9 @@ def validate_groups(self, group_list: List[Tuple[CandidateKey, List[Candidate]]] probability[head:tail] = self._batch_call_model(line_input_list, variable_input_list, value_input_list, features_list) is_cred = probability > self.threshold - for i in range(len(is_cred)): - logger.debug("ML decision: %s with prediction: %s for value: %s", is_cred[i], round(probability[i], 8), - group_list[i][0]) + if logger.isEnabledFor(logging.DEBUG): + for i in range(len(is_cred)): + logger.debug("ML decision: %s with prediction: %s for value: %s", is_cred[i], probability[i], + group_list[i][0]) # apply cast to float to avoid json export issue return is_cred, probability.astype(float) diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index 6c5abe2fb..df36dee3f 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -911,9 +911,10 @@ confidence: strong type: pattern values: - - (?ATCTT3xFfGN0[a-zA-Z0-9_-]{171}=[A-F0-9]{8})(?![=0-9A-Za-z_+-]) - filter_type: TokenPattern - min_line_len: 183 + - (?ATCTT3xFfGN0[a-zA-Z0-9_-]{80,800}(\\?=|%3[dD])[A-F0-9]{8})(?![=0-9A-Za-z_+-]) + filter_type: + - ValueAtlassianTokenCheck + min_line_len: 160 required_substrings: - ATCTT3xFfGN0 target: @@ -997,9 +998,10 @@ confidence: strong type: pattern values: - - (?ATATT3xFfGF0[a-zA-Z0-9_-]{171}=[A-F0-9]{8})(?![=0-9A-Za-z_+-]) - filter_type: TokenPattern - min_line_len: 191 + - (?ATATT3xFfGF0[a-zA-Z0-9_-]{80,800}(\\?=|%3[dD])[A-F0-9]{8})(?![=0-9A-Za-z_+-]) + filter_type: + - ValueAtlassianTokenCheck + min_line_len: 160 required_substrings: - ATATT3xFfGF0 target: diff --git a/requirements.txt b/requirements.txt index 4791f0864..aa0c17b31 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ PyYAML==6.0.1 python-docx==1.1.0 requests==2.32.0 typing_extensions==4.9.0 -whatthepatch==1.0.5 +whatthepatch==1.0.6 pdfminer.six==20231228 password-strength==0.0.3.post2 python-dateutil==2.8.2 diff --git a/tests/__init__.py b/tests/__init__.py index 8c657c9bc..3dfc0a7b5 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -7,14 +7,14 @@ NEGLIGIBLE_ML_THRESHOLD = 0.0001 # credentials count after scan -SAMPLES_CRED_COUNT: int = 425 -SAMPLES_CRED_LINE_COUNT: int = 442 +SAMPLES_CRED_COUNT: int = 429 +SAMPLES_CRED_LINE_COUNT: int = 446 # credentials count after post-processing -SAMPLES_POST_CRED_COUNT: int = 383 +SAMPLES_POST_CRED_COUNT: int = 387 # with option --doc -SAMPLES_IN_DOC = 407 +SAMPLES_IN_DOC = 410 # archived credentials that are not found without --depth SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 25 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index b94ef2521..b0f34278b 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -184,11 +184,11 @@ "confidence": "strong", "line_data_list": [ { - "line": "ATLASSIAN = \"ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=2F2DE974\"", + "line": "ATLASSIAN = \"ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=00203E68\"", "line_num": 1, "path": "tests/samples/atlassian_pat", "info": "tests/samples/atlassian_pat|RAW", - "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=2F2DE974", + "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=00203E68", "value_start": 13, "value_end": 205, "variable": null, @@ -196,7 +196,88 @@ "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 5.628712032325118, + "entropy": 5.614483907763351, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Atlassian PAT token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "escaped_backslash = ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE\\=00203E68", + "line_num": 2, + "path": "tests/samples/atlassian_pat", + "info": "tests/samples/atlassian_pat|RAW", + "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE\\=00203E68", + "value_start": 20, + "value_end": 213, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.592654863341127, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Atlassian PAT token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "url_escaped_capital = ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3D00203E68", + "line_num": 3, + "path": "tests/samples/atlassian_pat", + "info": "tests/samples/atlassian_pat|RAW", + "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3D00203E68", + "value_start": 22, + "value_end": 216, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.571478154549278, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Atlassian PAT token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "url_escaped_lowercase = ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3d00203E68", + "line_num": 4, + "path": "tests/samples/atlassian_pat", + "info": "tests/samples/atlassian_pat|RAW", + "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3d00203E68", + "value_start": 24, + "value_end": 218, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.572975546587697, "valid": true } } @@ -920,19 +1001,46 @@ "confidence": "strong", "line_data_list": [ { - "line": "repo_access = \"ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=91D14AE7\"", + "line": "\"Bitbucket Repository Access Token\" : \"ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=EDDE81AD\"", "line_num": 1, "path": "tests/samples/bitbucket_repository_access_token", "info": "tests/samples/bitbucket_repository_access_token|RAW", - "value": "ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=91D14AE7", - "value_start": 15, - "value_end": 207, + "value": "ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=EDDE81AD", + "value_start": 39, + "value_end": 231, "variable": null, "variable_start": -2, "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 5.559335469855166, + "entropy": 5.573080311527303, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.966, + "rule": "Token", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": "\"Bitbucket Repository Access Token\" : \"ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=EDDE81AD\"", + "line_num": 1, + "path": "tests/samples/bitbucket_repository_access_token", + "info": "tests/samples/bitbucket_repository_access_token|RAW", + "value": "ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=EDDE81AD", + "value_start": 39, + "value_end": 231, + "variable": "Bitbucket Repository Access Token", + "variable_start": 1, + "variable_end": 34, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.573080311527303, "valid": true } } @@ -7610,7 +7718,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78, + "ml_probability": 0.756, "rule": "Github Old Token", "severity": "high", "confidence": "moderate", @@ -7637,7 +7745,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78, + "ml_probability": 0.756, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -7650,8 +7758,8 @@ "value": "gireogicracklecrackle1231567190113413981", "value_start": 15, "value_end": 55, - "variable": "ngit_token", - "variable_start": 1, + "variable": "git_token", + "variable_start": 2, "variable_end": 11, "entropy_validation": { "iterator": "BASE36_CHARS", diff --git a/tests/data/doc.json b/tests/data/doc.json index ab8b856c2..be2eab7a4 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -62,11 +62,11 @@ "confidence": "strong", "line_data_list": [ { - "line": "ATLASSIAN = \"ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=2F2DE974\"", + "line": "ATLASSIAN = \"ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=00203E68\"", "line_num": 1, "path": "tests/samples/atlassian_pat", "info": "tests/samples/atlassian_pat|RAW", - "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=2F2DE974", + "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=00203E68", "value_start": 13, "value_end": 205, "variable": null, @@ -74,7 +74,88 @@ "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 5.628712032325118, + "entropy": 5.614483907763351, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Atlassian PAT token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "escaped_backslash = ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE\\=00203E68", + "line_num": 2, + "path": "tests/samples/atlassian_pat", + "info": "tests/samples/atlassian_pat|RAW", + "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE\\=00203E68", + "value_start": 20, + "value_end": 213, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.592654863341127, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Atlassian PAT token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "url_escaped_capital = ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3D00203E68", + "line_num": 3, + "path": "tests/samples/atlassian_pat", + "info": "tests/samples/atlassian_pat|RAW", + "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3D00203E68", + "value_start": 22, + "value_end": 216, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.571478154549278, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Atlassian PAT token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "url_escaped_lowercase = ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3d00203E68", + "line_num": 4, + "path": "tests/samples/atlassian_pat", + "info": "tests/samples/atlassian_pat|RAW", + "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3d00203E68", + "value_start": 24, + "value_end": 218, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.572975546587697, "valid": true } } @@ -501,19 +582,19 @@ "confidence": "strong", "line_data_list": [ { - "line": "repo_access = \"ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=91D14AE7\"", + "line": "\"Bitbucket Repository Access Token\" : \"ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=EDDE81AD\"", "line_num": 1, "path": "tests/samples/bitbucket_repository_access_token", "info": "tests/samples/bitbucket_repository_access_token|RAW", - "value": "ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=91D14AE7", - "value_start": 15, - "value_end": 207, + "value": "ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=EDDE81AD", + "value_start": 39, + "value_end": 231, "variable": null, "variable_start": -2, "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 5.559335469855166, + "entropy": 5.573080311527303, "valid": true } } diff --git a/tests/data/ml_threshold.json b/tests/data/ml_threshold.json index 0a74fb0f2..0287a0ae5 100644 --- a/tests/data/ml_threshold.json +++ b/tests/data/ml_threshold.json @@ -89,11 +89,11 @@ "confidence": "strong", "line_data_list": [ { - "line": "ATLASSIAN = \"ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=2F2DE974\"", + "line": "ATLASSIAN = \"ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=00203E68\"", "line_num": 1, "path": "tests/samples/atlassian_pat", "info": "", - "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=2F2DE974", + "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=00203E68", "value_start": 13, "value_end": 205, "variable": null, @@ -101,7 +101,88 @@ "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 5.628712032325118, + "entropy": 5.614483907763351, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Atlassian PAT token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "escaped_backslash = ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE\\=00203E68", + "line_num": 2, + "path": "tests/samples/atlassian_pat", + "info": "", + "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE\\=00203E68", + "value_start": 20, + "value_end": 213, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.592654863341127, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Atlassian PAT token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "url_escaped_capital = ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3D00203E68", + "line_num": 3, + "path": "tests/samples/atlassian_pat", + "info": "", + "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3D00203E68", + "value_start": 22, + "value_end": 216, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.571478154549278, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Atlassian PAT token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "url_escaped_lowercase = ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3d00203E68", + "line_num": 4, + "path": "tests/samples/atlassian_pat", + "info": "", + "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3d00203E68", + "value_start": 24, + "value_end": 218, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.572975546587697, "valid": true } } @@ -852,19 +933,46 @@ "confidence": "strong", "line_data_list": [ { - "line": "repo_access = \"ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=91D14AE7\"", + "line": "\"Bitbucket Repository Access Token\" : \"ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=EDDE81AD\"", "line_num": 1, "path": "tests/samples/bitbucket_repository_access_token", "info": "", - "value": "ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=91D14AE7", - "value_start": 15, - "value_end": 207, + "value": "ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=EDDE81AD", + "value_start": 39, + "value_end": 231, "variable": null, "variable_start": -2, "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 5.559335469855166, + "entropy": 5.573080311527303, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.966, + "rule": "Token", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": "\"Bitbucket Repository Access Token\" : \"ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=EDDE81AD\"", + "line_num": 1, + "path": "tests/samples/bitbucket_repository_access_token", + "info": "", + "value": "ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=EDDE81AD", + "value_start": 39, + "value_end": 231, + "variable": "Bitbucket Repository Access Token", + "variable_start": 1, + "variable_end": 34, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.573080311527303, "valid": true } } diff --git a/tests/data/output.json b/tests/data/output.json index 3da19895c..b9d388ae9 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -89,11 +89,11 @@ "confidence": "strong", "line_data_list": [ { - "line": "ATLASSIAN = \"ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=2F2DE974\"", + "line": "ATLASSIAN = \"ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=00203E68\"", "line_num": 1, "path": "tests/samples/atlassian_pat", "info": "", - "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=2F2DE974", + "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=00203E68", "value_start": 13, "value_end": 205, "variable": null, @@ -101,7 +101,88 @@ "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 5.628712032325118, + "entropy": 5.614483907763351, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Atlassian PAT token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "escaped_backslash = ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE\\=00203E68", + "line_num": 2, + "path": "tests/samples/atlassian_pat", + "info": "", + "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE\\=00203E68", + "value_start": 20, + "value_end": 213, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.592654863341127, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Atlassian PAT token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "url_escaped_capital = ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3D00203E68", + "line_num": 3, + "path": "tests/samples/atlassian_pat", + "info": "", + "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3D00203E68", + "value_start": 22, + "value_end": 216, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.571478154549278, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Atlassian PAT token", + "severity": "high", + "confidence": "strong", + "line_data_list": [ + { + "line": "url_escaped_lowercase = ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3d00203E68", + "line_num": 4, + "path": "tests/samples/atlassian_pat", + "info": "", + "value": "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3d00203E68", + "value_start": 24, + "value_end": 218, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.572975546587697, "valid": true } } @@ -798,19 +879,46 @@ "confidence": "strong", "line_data_list": [ { - "line": "repo_access = \"ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=91D14AE7\"", + "line": "\"Bitbucket Repository Access Token\" : \"ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=EDDE81AD\"", "line_num": 1, "path": "tests/samples/bitbucket_repository_access_token", "info": "", - "value": "ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=91D14AE7", - "value_start": 15, - "value_end": 207, + "value": "ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=EDDE81AD", + "value_start": 39, + "value_end": 231, "variable": null, "variable_start": -2, "variable_end": -2, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 5.559335469855166, + "entropy": 5.573080311527303, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.966, + "rule": "Token", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": "\"Bitbucket Repository Access Token\" : \"ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=EDDE81AD\"", + "line_num": 1, + "path": "tests/samples/bitbucket_repository_access_token", + "info": "", + "value": "ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=EDDE81AD", + "value_start": 39, + "value_end": 231, + "variable": "Bitbucket Repository Access Token", + "variable_start": 1, + "variable_end": 34, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 5.573080311527303, "valid": true } } diff --git a/tests/filters/test_value_file_path_check.py b/tests/filters/test_value_file_path_check.py index b2590c8d6..3a1697014 100644 --- a/tests/filters/test_value_file_path_check.py +++ b/tests/filters/test_value_file_path_check.py @@ -7,18 +7,24 @@ class TestValueFilePathCheck: - def test_value_file_path_check_p(self, file_path: pytest.fixture, success_line: pytest.fixture) -> None: - line_data = get_line_data(file_path, line=success_line, pattern=LINE_VALUE_PATTERN) + @pytest.mark.parametrize("line", [ + "5//0KCPafDhZvtCwqrsyiKFeDGT_0ZGHiI-E0ClIWrLC7tZ1WE5vHc4-Y2qi1IhPy3Pz5fmCe9OPIxEZUONUg7SWJF9nwQ_j2lIdXU0", + ]) + def test_value_file_path_check_p(self, file_path: pytest.fixture, line: str) -> None: + line_data = get_line_data(file_path, line=line, pattern=LINE_VALUE_PATTERN) assert ValueFilePathCheck().run(line_data, DUMMY_ANALYSIS_TARGET) is False @pytest.mark.parametrize( "line", [ + "crackle/filepath.txt", "/home/user/tmp", # simple path "../..", # path + "dir/..", # path + "../dir", # path "file:///Crackle/filepath/", # path from browser url "~/.custompass", # path with synonym - "crackle/filepath_txt", + "./sshpass.sh", # path with synonym "crackle/file.path", # "C:\\Crackle\\filepath", # ]) diff --git a/tests/samples/atlassian_pat b/tests/samples/atlassian_pat index 38a176e32..baa2596f2 100644 --- a/tests/samples/atlassian_pat +++ b/tests/samples/atlassian_pat @@ -1 +1,4 @@ -ATLASSIAN = "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=2F2DE974" +ATLASSIAN = "ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE=00203E68" +escaped_backslash = ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE\=00203E68 +url_escaped_capital = ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3D00203E68 +url_escaped_lowercase = ATATT3xFfGF0vNZ7oy6ON4KrWEzKEu96n-r2bCLOOcdOADizvJqPd89yKyP853uTZSflBi1Lk1zD460BqyMTa08VUEy8-oOzr1esg3j_mZKZwAELnRkCDTJZXhHsv5jaWHyAj4LRL-6h8LI-5MrYs4l7xXSaY8odQxDKJB4hDvdkmuh61xea2jE%3d00203E68 diff --git a/tests/samples/bitbucket_repository_access_token b/tests/samples/bitbucket_repository_access_token index 13d8ceeae..34a456771 100644 --- a/tests/samples/bitbucket_repository_access_token +++ b/tests/samples/bitbucket_repository_access_token @@ -1 +1 @@ -repo_access = "ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=91D14AE7" \ No newline at end of file +"Bitbucket Repository Access Token" : "ATCTT3xFfGN0zXtbKHz2POF86xa-2aBiYC4o_T3-myk01bmFVluUIFtGm_VFQwLizp4o1FKw-AMZhtdA0NzizshnA8WzRdfgv6GeTyowCD101oqKbJ4nx9DFsar5YyUNkwO9maR9-00tQvfciyfOHtPKG6K1d76Ki3iFo7roGeyJu4j1jM3GwQ4=EDDE81AD" \ No newline at end of file diff --git a/tests/test_main.py b/tests/test_main.py index 8e390ade6..96265b323 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,5 +1,4 @@ import io -import io import os import random import shutil @@ -796,6 +795,13 @@ def test_param_n(self) -> None: def test_param_p(self) -> None: # internal parametrized tests for quick debug items = [ # + ("slt.py", b'\\t\\tsalt = "\\x187bhgerjhqw\\n iKa\\tW_R~0/8"', "salt", "\\x187bhgerjhqw\\n iKa\\tW_R~0/8"), + ("log.txt", + b'json\\nAuthorization: Basic jfhlksadjiu9813ryiuhdfskadjlkjh34\\n\\u003c/code\\u003e\\u003c/pre\\u003e"', + "Authorization", "jfhlksadjiu9813ryiuhdfskadjlkjh34"), + ("pwd.py", b'password = "ji3_8iKgaW_R~0/8"', "password", "ji3_8iKgaW_R~0/8"), + ("pwd.py", b'password = "/_tcTz