Skip to content

Commit

Permalink
Merge branch 'auxiliary' into uuid
Browse files Browse the repository at this point in the history
  • Loading branch information
babenek committed Aug 7, 2024
2 parents c686b69 + d7f65c1 commit 59e9c21
Show file tree
Hide file tree
Showing 29 changed files with 2,306 additions and 6,809 deletions.
26 changes: 18 additions & 8 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -437,20 +437,30 @@ jobs:

run_doc_benchmark:
runs-on: ubuntu-latest
if: ${{ 'Samsung/CredSweeper' == github.event.pull_request.head.repo.full_name }}
if: ${{ 'push' == github.event_name }} or ${{ 'Samsung/CredSweeper' == github.event.pull_request.head.repo.full_name }}
steps:
- name: Checkout CredSweeper
- name: Checkout CredSweeper PR
if: ${{ 'pull_request' == github.event_name }}
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}

- name: Checkout CredSweeper HEAD
if: ${{ 'push' == github.event_name }}
uses: actions/checkout@v4
with:
ref: ${{ github.event.head }}

- name: Send cURL request with the commit SHA
if: ${{ 'pull_request' == github.event_name }}
run: |
COMMIT_SHA=$(git rev-parse HEAD)
curl -X POST ${{ secrets.SLACK_URL }} \
--data-urlencode \
"payload={'text':'[BMT Request] ${{ github.event.repository.html_url }}/commit/${COMMIT_SHA}'}"
if [[ "${{ secrets.SLACK_URL }}" =~ http.*/.*/.* ]]; then
COMMIT_SHA=$(git rev-parse HEAD)
echo ${COMMIT_SHA}
curl -X POST ${{ secrets.SLACK_URL }} \
--data-urlencode \
"payload={'text':'[BMT Request] ${{ github.event.repository.html_url }}/commit/${COMMIT_SHA}'}"
else
echo "secrets.SLACK_URL is not available"
fi
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
189 changes: 95 additions & 94 deletions cicd/benchmark.txt

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion credsweeper/filters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
from credsweeper.filters.value_allowlist_check import ValueAllowlistCheck
from credsweeper.filters.value_array_dictionary_check import ValueArrayDictionaryCheck
from credsweeper.filters.value_atlassian_token_check import ValueAtlassianTokenCheck
from credsweeper.filters.value_azure_token_check import ValueAzureTokenCheck
from credsweeper.filters.value_base32_data_check import ValueBase32DataCheck
from credsweeper.filters.value_base64_data_check import ValueBase64DataCheck
from credsweeper.filters.value_base64_encoded_pem_check import ValueBase64EncodedPem
from credsweeper.filters.value_base64_key_check import ValueBase64KeyCheck
from credsweeper.filters.value_base64_part_check import ValueBase64PartCheck
from credsweeper.filters.value_blocklist_check import ValueBlocklistCheck
from credsweeper.filters.value_camel_case_check import ValueCamelCaseCheck
from credsweeper.filters.value_couple_keyword_check import ValueCoupleKeywordCheck
Expand All @@ -24,7 +26,6 @@
from credsweeper.filters.value_grafana_check import ValueGrafanaCheck
from credsweeper.filters.value_grafana_service_check import ValueGrafanaServiceCheck
from credsweeper.filters.value_hex_number_check import ValueHexNumberCheck
from credsweeper.filters.value_ip_check import ValueIPCheck
from credsweeper.filters.value_jfrog_token_check import ValueJfrogTokenCheck
from credsweeper.filters.value_json_web_token_check import ValueJsonWebTokenCheck
from credsweeper.filters.value_last_word_check import ValueLastWordCheck
Expand Down
2 changes: 2 additions & 0 deletions credsweeper/filters/group/general_pattern.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from credsweeper.common.constants import GroupType
from credsweeper.config import Config
from credsweeper.filters import ValueUselessWordCheck
from credsweeper.filters.group import Group


Expand All @@ -8,3 +9,4 @@ class GeneralPattern(Group):

def __init__(self, config: Config) -> None:
super().__init__(config, GroupType.PATTERN)
self.filters.extend([ValueUselessWordCheck()])
52 changes: 52 additions & 0 deletions credsweeper/filters/value_azure_token_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import contextlib
import json

from credsweeper.common.constants import Chars
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter
from credsweeper.filters.value_entropy_base64_check import ValueEntropyBase64Check
from credsweeper.utils import Util


class ValueAzureTokenCheck(Filter):
"""
Azure tokens contains header, payload and signature
https://learn.microsoft.com/en-us/azure/active-directory-b2c/access-tokens
"""

def __init__(self, config: Config = None) -> None:
pass

def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
"""Run filter checks on received token which might be structured.
Args:
line_data: credential candidate data
target: multiline target from which line data was obtained
Return:
True, when need to filter candidate and False if left
"""
with contextlib.suppress(Exception):
parts = line_data.value.split('.')
if 3 != len(parts):
return True
hdr = Util.decode_base64(parts[0], padding_safe=True, urlsafe_detect=True)
header = json.loads(hdr)
if not ("alg" in header and "typ" in header and "kid" in header):
# must be all parts in header
return True
pld = Util.decode_base64(parts[1], padding_safe=True, urlsafe_detect=True)
payload = json.loads(pld)
if not ("iss" in payload and "exp" in payload and "iat" in payload):
# must be all parts in payload
return True
min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(parts[2]))
entropy = Util.get_shannon_entropy(parts[2], Chars.BASE64URL_CHARS.value)
# good signature has to be like random bytes
return entropy < min_entropy

return True
56 changes: 56 additions & 0 deletions credsweeper/filters/value_base64_part_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import contextlib
import statistics

from credsweeper.common.constants import Chars
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter
from credsweeper.utils import Util


class ValueBase64PartCheck(Filter):
"""
Check that candidate is NOT a part of base64 long line
"""

def __init__(self, config: Config = None) -> None:
pass

def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
"""Run filter checks on received weird base64 token which must be a random string
Args:
line_data: credential candidate data
target: multiline target from which line data was obtained
Return:
True, when need to filter candidate and False if left
"""

with contextlib.suppress(Exception):
if line_data.value_start and '/' == line_data.line[line_data.value_start - 1]:
if '-' in line_data.value or '_' in line_data.value:
# the value contains url-safe chars, so '/' is a delimiter
return False
value_entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE64STD_CHARS.value)
left_start = line_data.value_start - len(line_data.value)
if 0 > left_start:
left_start = 0
left_entropy = Util.get_shannon_entropy(line_data.line[left_start:line_data.value_start],
Chars.BASE64STD_CHARS.value)
right_end = line_data.value_end + len(line_data.value)
if len(line_data.line) < right_end:
right_end = len(line_data.line)
right_entropy = Util.get_shannon_entropy(line_data.line[line_data.value_end:right_end],
Chars.BASE64STD_CHARS.value)
data = [value_entropy, left_entropy, right_entropy]
avg = statistics.mean(data)
stdev = statistics.stdev(data, avg)
avg_min = avg - stdev
if avg_min < left_entropy and avg_min < right_entropy:
# high entropy of bound parts looks like a part of base64 long line
return True

return False
51 changes: 0 additions & 51 deletions credsweeper/filters/value_ip_check.py

This file was deleted.

47 changes: 37 additions & 10 deletions credsweeper/filters/value_json_web_token_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,19 @@
class ValueJsonWebTokenCheck(Filter):
"""
Check that candidate is JWT which starts usually from 'eyJ'
only header is parsed with "typ" or "alg" member from example of RFC7519
https://datatracker.ietf.org/doc/html/rfc7519
registered keys are checked to be in the JWT parts
https://www.iana.org/assignments/jose/jose.xhtml
"""
header_keys = {
"alg", "jku", "jwk", "kid", "x5u", "x5c", "x5t", "x5t#S256", "typ", "cty", "crit", "alg", "enc", "zip", "jku",
"jwk", "kid", "x5u", "x5c", "x5t", "x5t#S256", "typ", "cty", "crit", "epk", "apu", "apv", "iv", "tag", "p2s",
"p2c", "iss", "sub", "aud", "b64", "ppt", "url", "nonce", "svt"
}
payload_keys = {
"iss", "sub", "aud", "exp", "nbf", "iat", "jti", "kty", "use", "key_ops", "alg", "enc", "zip", "jku", "jwk",
"kid", "x5u", "x5c", "x5t", "x5t#S256", "crv", "x", "y", "d", "n", "e", "d", "p", "q", "dp", "dq", "qi", "oth",
"k", "crv", "d", "x", "ext", "crit", "keys", "id", "role", "token", "secret", "password", "nonce"
}

def __init__(self, config: Config = None) -> None:
pass
Expand All @@ -29,12 +39,29 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
True, when need to filter candidate and False if left
"""
header_check = False
payload_check = False
signature_check = False
with contextlib.suppress(Exception):
delimiter_pos = line_data.value.find(".")
# jwt token. '.' must be always in given data, according regex in rule
value = line_data.value[:delimiter_pos]
decoded = Util.decode_base64(value, padding_safe=True, urlsafe_detect=True)
if header := json.loads(decoded):
if "alg" in header or "typ" in header:
return False
return True
jwt_parts = line_data.value.split('.')
for part in jwt_parts:
data = Util.decode_base64(part, padding_safe=True, urlsafe_detect=True)
if part.startswith("eyJ"):
# open part - just base64 encoded
json_keys = json.loads(data).keys()
# header will be checked first
if not header_check:
header_check = bool(ValueJsonWebTokenCheck.header_keys.intersection(json_keys))
# payload follows the header
if not payload_check:
payload_check = bool(ValueJsonWebTokenCheck.payload_keys.intersection(json_keys))
# any other payloads are allowed
elif header_check and payload_check and not signature_check:
# signature check or skip encrypted part
signature_check = not Util.is_ascii_entropy_validate(data)
else:
break
if header_check and payload_check and signature_check:
return False
else:
return True
60 changes: 25 additions & 35 deletions credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,34 +126,6 @@
target:
- code

- name: IPv4
severity: info
confidence: weak
type: pattern
values:
- (?<![.0-9a-zA-Z])(?P<value>[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2})(?![.0-9a-zA-Z])
filter_type:
- ValueIPCheck
min_line_len: 10
required_substrings:
- "."
target:
- code

- name: IPv6
severity: info
confidence: strong
type: pattern
values:
- (?<![:0-9A-Za-z])(?P<value>[0-9A-Fa-f]{0,4}:(:?[0-9A-Fa-f]{1,4}:?){0,6}:[0-9A-Fa-f]{1,4})(?![:0-9A-Za-z])
filter_type:
- ValueIPCheck
min_line_len: 10
required_substrings:
- ":"
target:
- code

- name: UUID
severity: info
confidence: strong
Expand Down Expand Up @@ -250,7 +222,9 @@
type: pattern
values:
- (?<![0-9A-Za-z_+-])(?P<value>EAA[0-9A-Za-z]{80,800})
filter_type: GeneralPattern
filter_type:
- ValuePatternCheck
- ValueBase64PartCheck
required_substrings:
- EAA
min_line_len: 80
Expand Down Expand Up @@ -381,17 +355,18 @@

- name: JSON Web Token
severity: medium
confidence: moderate
confidence: strong
type: pattern
values:
- (?<![.0-9A-Za-z_+-])(?P<value>eyJ[0-9A-Za-z_=-]{15,8000}([.0-9A-Za-z_=-]{1,8000})?)
filter_type: GeneralPattern
use_ml: true
- (?<![0-9A-Za-z_+-])(?P<value>eyJ[0-9A-Za-z_+/=-]{15,8000}(\.[0-9A-Za-z_+/=-]{0,8000}){2,16})
filter_type:
- ValueJsonWebTokenCheck
required_substrings:
- eyJ
min_line_len: 18
target:
- code
- doc

- name: MailChimp API Key
severity: high
Expand Down Expand Up @@ -884,9 +859,9 @@
confidence: strong
type: pattern
values:
- (?<![0-9A-Za-z_+-])(?P<value>eyJ[A-Za-z0-9_=-]{50,500}\.eyJ[A-Za-z0-9_=-]{1,8000}\.[A-Za-z0-9_=-]{1,8000})
- (?<![0-9A-Za-z_+-])(?P<value>eyJ[A-Za-z0-9_=-]{50,500}\.eyJ[A-Za-z0-9_=-]{8,8000}\.[A-Za-z0-9_=-]{18,800})
filter_type:
- ValueJsonWebTokenCheck
- ValueAzureTokenCheck
required_substrings:
- eyJ
min_line_len: 148
Expand Down Expand Up @@ -1207,6 +1182,21 @@
- code
- doc

- name: Hashicorp Terraform Token
severity: high
confidence: strong
type: pattern
values:
- (?<![.0-9A-Za-z_/+-])(?P<value>[0-9A-Za-z_-]{14}\.atlasv1\.[0-9A-Za-z_-]{67})(?![=0-9A-Za-z_/+-])
filter_type:
- ValuePatternCheck
min_line_len: 90
required_substring:
- .atlasv1.
target:
- code
- doc

- name: Jira 2FA
severity: info
confidence: weak
Expand Down
Loading

0 comments on commit 59e9c21

Please sign in to comment.