diff --git a/cicd/benchmark.txt b/cicd/benchmark.txt index 91eaf87f3..4c80a3fb6 100644 --- a/cicd/benchmark.txt +++ b/cicd/benchmark.txt @@ -1,6 +1,6 @@ -META MD5 6e26189b51be42fd388d39547ee97d9b -DATA MD5 4d496bb06291caeab62a6eadaeeddf83 -DATA: 16345157 interested lines. MARKUP: 62633 items +META MD5 59c815aa130ca7e038434f9ccb941d7c +DATA MD5 9d2273d7f0bb4aee72a768908cc5f4f6 +DATA: 16345157 interested lines. MARKUP: 62634 items FileType FileNumber ValidLines Positives Negatives Templates --------------- ------------ ------------ ----------- ----------- ----------- 194 28318 66 427 87 @@ -29,7 +29,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .cmd 4 401 2 3 .cnf 8 858 18 45 18 .coffee 1 585 2 -.conf 60 4945 54 71 53 +.conf 60 4945 53 72 53 .config 20 492 16 33 1 .cpp 15 5688 2 61 .creds 1 10 1 1 @@ -63,7 +63,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .gd 1 37 1 .gml 3 3075 26 .gni 3 5017 18 -.go 1080 566476 694 4331 739 +.go 1080 566476 689 4339 737 .golden 5 1168 1 14 29 .gradle 45 3265 4 91 100 .graphql 7 420 13 @@ -85,8 +85,8 @@ FileType FileNumber ValidLines Positives Negatives Templat .java 621 134132 362 1359 170 .jenkinsfile 1 58 2 7 .jinja2 1 64 2 -.js 659 536413 537 2636 330 -.json 850 13046270 1074 10778 140 +.js 659 536413 536 2638 330 +.json 850 13046270 1074 10783 140 .jsp 13 3202 1 42 .jsx 7 857 19 .jwt 1 1 2 @@ -112,7 +112,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .markdown 3 139 3 1 .markerb 3 12 3 .marko 1 21 2 -.md 674 149399 723 2372 661 +.md 674 149399 719 2402 635 .mdx 3 549 7 .mjml 1 18 1 .mjs 22 4424 78 343 @@ -141,7 +141,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .pod 9 1859 2 24 .pony 1 83 4 .postinst 2 354 4 16 -.pp 10 563 23 +.pp 10 563 21 .ppk 1 45 37 .private 1 15 1 .proj 1 85 3 @@ -152,7 +152,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .pug 2 193 2 .purs 1 69 4 .pxd 1 150 5 2 -.py 890 291553 682 3462 729 +.py 890 291553 680 3463 729 .pyi 4 1361 9 .pyp 1 167 1 .pyx 2 1094 23 @@ -171,7 +171,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .rs 31 9855 2 238 11 .rsc 1 691 1 .rsp 16 7101 19 10 28 -.rst 86 33980 69 358 68 +.rst 86 33980 70 357 68 .rules 1 6 2 .sample 2 25 3 4 4 .sbt 3 570 6 2 @@ -219,26 +219,26 @@ FileType FileNumber ValidLines Positives Negatives Templat .xml 9 689 9 .xsl 1 311 1 .yaml 137 19004 128 356 44 -.yml 418 36162 549 912 384 +.yml 418 36162 549 913 384 .zsh 6 872 12 .zsh-theme 1 97 1 -TOTAL: 10259 16345157 8766 59713 5179 -credsweeper result_cnt : 7735, lost_cnt : 0, true_cnt : 7513, false_cnt : 222 +TOTAL: 10259 16345157 8754 59758 5151 +credsweeper result_cnt : 7751, lost_cnt : 0, true_cnt : 7529, false_cnt : 222 Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------ ----------- ----------- ----------- ---------- ---- ---- ----- ---- -------- -------- -------- -------- -------- -------- -API 128 3130 185 113 111 2 3313 17 0.000603 0.132812 0.994482 0.982301 0.867188 0.921162 +API 128 3131 185 113 111 2 3314 17 0.000603 0.132812 0.994483 0.982301 0.867188 0.921162 AWS Client ID 167 18 0 160 160 0 18 7 0.000000 0.041916 0.962162 1.000000 0.958084 0.978593 AWS Multi 75 14 0 87 75 11 3 0 0.785714 0.000000 0.876404 0.872093 1.000000 0.931677 AWS S3 Bucket 66 24 0 91 65 24 0 1 1.000000 0.015152 0.722222 0.730337 0.984848 0.838710 Atlassian Old PAT token 27 208 3 12 3 8 203 24 0.037915 0.888889 0.865546 0.272727 0.111111 0.157895 -Auth 412 2724 76 377 358 19 2781 54 0.006786 0.131068 0.977273 0.949602 0.868932 0.907478 +Auth 412 2725 76 378 359 19 2782 53 0.006783 0.128641 0.977591 0.949735 0.871359 0.908861 Azure Access Token 19 0 0 12 12 0 0 7 0.368421 0.631579 1.000000 0.631579 0.774194 BASE64 Private Key 7 2 0 7 7 0 2 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 BASE64 encoded PEM Private Key 7 0 0 5 5 0 0 2 0.285714 0.714286 1.000000 0.714286 0.833333 Bitbucket Client ID 142 1808 9 48 28 19 1798 114 0.010457 0.802817 0.932108 0.595745 0.197183 0.296296 Bitbucket Client Secret 230 527 10 40 29 11 526 201 0.020484 0.873913 0.723598 0.725000 0.126087 0.214815 -Certificate 25 466 1 27 20 7 460 5 0.014989 0.200000 0.975610 0.740741 0.800000 0.769231 -Credential 94 154 74 85 85 0 228 9 0.000000 0.095745 0.972050 1.000000 0.904255 0.949721 +Certificate 25 467 1 27 20 7 461 5 0.014957 0.200000 0.975659 0.740741 0.800000 0.769231 +Credential 94 154 74 92 92 0 228 2 0.000000 0.021277 0.993789 1.000000 0.978723 0.989247 Docker Swarm Token 2 0 0 1 1 0 0 1 0.500000 0.500000 1.000000 0.500000 0.666667 Dropbox App secret 64 114 0 46 35 10 104 29 0.087719 0.453125 0.780899 0.777778 0.546875 0.642202 Facebook Access Token 0 1 0 0 0 1 0 0.000000 1.000000 @@ -253,16 +253,16 @@ Grafana Provisioned API Key 22 1 0 JSON Web Token 170 61 0 131 131 0 61 39 0.000000 0.229412 0.831169 1.000000 0.770588 0.870432 Jira / Confluence PAT token 0 4 0 0 0 4 0 0.000000 1.000000 Jira 2FA 15 6 0 12 12 0 6 3 0.000000 0.200000 0.857143 1.000000 0.800000 0.888889 -Key 539 8456 464 468 461 7 8913 78 0.000785 0.144712 0.991014 0.985043 0.855288 0.915591 -Nonce 91 48 0 83 81 2 46 10 0.041667 0.109890 0.913669 0.975904 0.890110 0.931034 +Key 537 8460 464 469 462 7 8917 75 0.000784 0.139665 0.991333 0.985075 0.860335 0.918489 +Nonce 91 49 0 83 81 2 47 10 0.040816 0.109890 0.914286 0.975904 0.890110 0.931034 PEM Private Key 1019 1483 0 1023 1019 4 1479 0 0.002697 0.000000 0.998401 0.996090 1.000000 0.998041 -Password 1843 7479 2722 1721 1652 69 10132 191 0.006764 0.103635 0.978412 0.959907 0.896365 0.927048 +Password 1844 7504 2709 1726 1657 69 10144 187 0.006756 0.101410 0.978768 0.960023 0.898590 0.928291 Salt 45 74 2 42 41 1 75 4 0.013158 0.088889 0.958678 0.976190 0.911111 0.942529 -Secret 1368 28360 868 1235 1229 6 29222 139 0.000205 0.101608 0.995261 0.995142 0.898392 0.944295 +Secret 1357 28357 867 1236 1230 6 29218 127 0.000205 0.093589 0.995651 0.995146 0.906411 0.948708 Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000 Slack Token 4 1 0 4 4 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 -Token 648 3952 437 539 533 6 4383 115 0.001367 0.177469 0.975978 0.988868 0.822531 0.898062 +Token 648 3953 437 540 534 6 4384 114 0.001367 0.175926 0.976181 0.988889 0.824074 0.898990 Twilio API Key 0 5 2 0 0 7 0 0.000000 1.000000 -URL Credentials 209 129 239 196 196 0 368 13 0.000000 0.062201 0.977470 1.000000 0.937799 0.967901 +URL Credentials 209 143 225 196 196 0 368 13 0.000000 0.062201 0.977470 1.000000 0.937799 0.967901 UUID 1069 1 0 1061 1060 1 0 9 1.000000 0.008419 0.990654 0.999057 0.991581 0.995305 - 8766 59713 5179 7742 7513 222 59491 1253 0.003718 0.142939 0.978461 0.971299 0.857061 0.910611 + 8754 59758 5151 7758 7529 222 59536 1225 0.003715 0.139936 0.978880 0.971359 0.860064 0.912330 diff --git a/credsweeper/common/constants.py b/credsweeper/common/constants.py index 1b1c7e684..807146623 100644 --- a/credsweeper/common/constants.py +++ b/credsweeper/common/constants.py @@ -14,7 +14,7 @@ class KeywordPattern: separator = r"\s*\]?\s*" \ r"(?P:( [a-z]{3,9}[?]? )?=" \ r"|:|=>|!=|===|==|=)" \ - r"\s*(?P(\w|\.|->|\(|\[)*[\[\(\{](\w{1,32}=)?\s*)?" + r"\s*(?P((new\s*)?\w|\.|->|\(|\[)*[\[\(\{](\w{1,32}=)?\s*)?" # Authentication scheme ( oauth | basic | bearer | apikey ) precedes to credential value = r"(?P((b|r|br|rb|u|f|rf|fr|\\{0,8})?[`'\"]){1,4})?" \ r"( ?(oauth|bot|basic|bearer|apikey|accesskey) )?" \ diff --git a/experiment/main.sh b/experiment/main.sh index 35279e6da..9b764a6d9 100755 --- a/experiment/main.sh +++ b/experiment/main.sh @@ -18,7 +18,7 @@ if [ 0 -ne ${error_code} ]; then exit ${error_code}; fi cd ${CREDSWEEPER_DIR} report_file=${RESULT_DIR}/${now}.json -${CREDSWEEPER_DIR}/.venv/bin/python -m credsweeper --sort --path ~/q/DataCred/abspos/data/ --log info --job $(nproc) --subtext --save-json ${report_file} +${CREDSWEEPER_DIR}/.venv/bin/python -m credsweeper --sort --path ~/q/DataCred/auxiliary/data/ --log info --job $(nproc) --subtext --save-json ${report_file} -cd ~/q/DataCred/abspos/ +cd ~/q/DataCred/auxiliary/ .venv/bin/python -m benchmark --scanner credsweeper --load ${report_file} | tee ${report_file}.log diff --git a/tests/test_main.py b/tests/test_main.py index c60c09b2a..3d2eb37d6 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -809,6 +809,9 @@ def test_param_n(self) -> None: def test_param_p(self) -> None: # internal parametrized tests for quick debug items = [ # + ('tk.java', + b' final OAuth2AccessToken accessToken = new OAuth2AccessToken("7c9yp7.y513e1t629w7e8f3n1z4m856a05o");', + "OAuth2AccessToken accessToken", "7c9yp7.y513e1t629w7e8f3n1z4m856a05o"), ('my.toml', b'{nkey: XMIGDHSYNSJQ0XNR}', "nkey", "XMIGDHSYNSJQ0XNR"), ('my.yaml', b'password: 3287#JQ0XX@IG}', "password", "3287#JQ0XX@IG}"), ("creds.py", b'"tokens": ["xabsjhdbasu7d9g", "ashbjhdifufhsds"]', "tokens", "xabsjhdbasu7d9g"),