Skip to content

Commit

Permalink
Keyword improvement with "new" (#596)
Browse files Browse the repository at this point in the history
* [skip actions] [auxiliary] 2024-08-15T08:09:06+03:00

* add _new_ for wrap case

* BM scores
  • Loading branch information
babenek authored Aug 15, 2024
1 parent 5e2bf59 commit 355a5d5
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 28 deletions.
50 changes: 25 additions & 25 deletions cicd/benchmark.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
META MD5 6e26189b51be42fd388d39547ee97d9b
DATA MD5 4d496bb06291caeab62a6eadaeeddf83
DATA: 16345157 interested lines. MARKUP: 62633 items
META MD5 59c815aa130ca7e038434f9ccb941d7c
DATA MD5 9d2273d7f0bb4aee72a768908cc5f4f6
DATA: 16345157 interested lines. MARKUP: 62634 items
FileType FileNumber ValidLines Positives Negatives Templates
--------------- ------------ ------------ ----------- ----------- -----------
194 28318 66 427 87
Expand Down Expand Up @@ -29,7 +29,7 @@ FileType FileNumber ValidLines Positives Negatives Templat
.cmd 4 401 2 3
.cnf 8 858 18 45 18
.coffee 1 585 2
.conf 60 4945 54 71 53
.conf 60 4945 53 72 53
.config 20 492 16 33 1
.cpp 15 5688 2 61
.creds 1 10 1 1
Expand Down Expand Up @@ -63,7 +63,7 @@ FileType FileNumber ValidLines Positives Negatives Templat
.gd 1 37 1
.gml 3 3075 26
.gni 3 5017 18
.go 1080 566476 694 4331 739
.go 1080 566476 689 4339 737
.golden 5 1168 1 14 29
.gradle 45 3265 4 91 100
.graphql 7 420 13
Expand All @@ -85,8 +85,8 @@ FileType FileNumber ValidLines Positives Negatives Templat
.java 621 134132 362 1359 170
.jenkinsfile 1 58 2 7
.jinja2 1 64 2
.js 659 536413 537 2636 330
.json 850 13046270 1074 10778 140
.js 659 536413 536 2638 330
.json 850 13046270 1074 10783 140
.jsp 13 3202 1 42
.jsx 7 857 19
.jwt 1 1 2
Expand All @@ -112,7 +112,7 @@ FileType FileNumber ValidLines Positives Negatives Templat
.markdown 3 139 3 1
.markerb 3 12 3
.marko 1 21 2
.md 674 149399 723 2372 661
.md 674 149399 719 2402 635
.mdx 3 549 7
.mjml 1 18 1
.mjs 22 4424 78 343
Expand Down Expand Up @@ -141,7 +141,7 @@ FileType FileNumber ValidLines Positives Negatives Templat
.pod 9 1859 2 24
.pony 1 83 4
.postinst 2 354 4 16
.pp 10 563 23
.pp 10 563 21
.ppk 1 45 37
.private 1 15 1
.proj 1 85 3
Expand All @@ -152,7 +152,7 @@ FileType FileNumber ValidLines Positives Negatives Templat
.pug 2 193 2
.purs 1 69 4
.pxd 1 150 5 2
.py 890 291553 682 3462 729
.py 890 291553 680 3463 729
.pyi 4 1361 9
.pyp 1 167 1
.pyx 2 1094 23
Expand All @@ -171,7 +171,7 @@ FileType FileNumber ValidLines Positives Negatives Templat
.rs 31 9855 2 238 11
.rsc 1 691 1
.rsp 16 7101 19 10 28
.rst 86 33980 69 358 68
.rst 86 33980 70 357 68
.rules 1 6 2
.sample 2 25 3 4 4
.sbt 3 570 6 2
Expand Down Expand Up @@ -219,26 +219,26 @@ FileType FileNumber ValidLines Positives Negatives Templat
.xml 9 689 9
.xsl 1 311 1
.yaml 137 19004 128 356 44
.yml 418 36162 549 912 384
.yml 418 36162 549 913 384
.zsh 6 872 12
.zsh-theme 1 97 1
TOTAL: 10259 16345157 8766 59713 5179
credsweeper result_cnt : 7735, lost_cnt : 0, true_cnt : 7513, false_cnt : 222
TOTAL: 10259 16345157 8754 59758 5151
credsweeper result_cnt : 7751, lost_cnt : 0, true_cnt : 7529, false_cnt : 222
Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1
------------------------------ ----------- ----------- ----------- ---------- ---- ---- ----- ---- -------- -------- -------- -------- -------- --------
API 128 3130 185 113 111 2 3313 17 0.000603 0.132812 0.994482 0.982301 0.867188 0.921162
API 128 3131 185 113 111 2 3314 17 0.000603 0.132812 0.994483 0.982301 0.867188 0.921162
AWS Client ID 167 18 0 160 160 0 18 7 0.000000 0.041916 0.962162 1.000000 0.958084 0.978593
AWS Multi 75 14 0 87 75 11 3 0 0.785714 0.000000 0.876404 0.872093 1.000000 0.931677
AWS S3 Bucket 66 24 0 91 65 24 0 1 1.000000 0.015152 0.722222 0.730337 0.984848 0.838710
Atlassian Old PAT token 27 208 3 12 3 8 203 24 0.037915 0.888889 0.865546 0.272727 0.111111 0.157895
Auth 412 2724 76 377 358 19 2781 54 0.006786 0.131068 0.977273 0.949602 0.868932 0.907478
Auth 412 2725 76 378 359 19 2782 53 0.006783 0.128641 0.977591 0.949735 0.871359 0.908861
Azure Access Token 19 0 0 12 12 0 0 7 0.368421 0.631579 1.000000 0.631579 0.774194
BASE64 Private Key 7 2 0 7 7 0 2 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
BASE64 encoded PEM Private Key 7 0 0 5 5 0 0 2 0.285714 0.714286 1.000000 0.714286 0.833333
Bitbucket Client ID 142 1808 9 48 28 19 1798 114 0.010457 0.802817 0.932108 0.595745 0.197183 0.296296
Bitbucket Client Secret 230 527 10 40 29 11 526 201 0.020484 0.873913 0.723598 0.725000 0.126087 0.214815
Certificate 25 466 1 27 20 7 460 5 0.014989 0.200000 0.975610 0.740741 0.800000 0.769231
Credential 94 154 74 85 85 0 228 9 0.000000 0.095745 0.972050 1.000000 0.904255 0.949721
Certificate 25 467 1 27 20 7 461 5 0.014957 0.200000 0.975659 0.740741 0.800000 0.769231
Credential 94 154 74 92 92 0 228 2 0.000000 0.021277 0.993789 1.000000 0.978723 0.989247
Docker Swarm Token 2 0 0 1 1 0 0 1 0.500000 0.500000 1.000000 0.500000 0.666667
Dropbox App secret 64 114 0 46 35 10 104 29 0.087719 0.453125 0.780899 0.777778 0.546875 0.642202
Facebook Access Token 0 1 0 0 0 1 0 0.000000 1.000000
Expand All @@ -253,16 +253,16 @@ Grafana Provisioned API Key 22 1 0
JSON Web Token 170 61 0 131 131 0 61 39 0.000000 0.229412 0.831169 1.000000 0.770588 0.870432
Jira / Confluence PAT token 0 4 0 0 0 4 0 0.000000 1.000000
Jira 2FA 15 6 0 12 12 0 6 3 0.000000 0.200000 0.857143 1.000000 0.800000 0.888889
Key 539 8456 464 468 461 7 8913 78 0.000785 0.144712 0.991014 0.985043 0.855288 0.915591
Nonce 91 48 0 83 81 2 46 10 0.041667 0.109890 0.913669 0.975904 0.890110 0.931034
Key 537 8460 464 469 462 7 8917 75 0.000784 0.139665 0.991333 0.985075 0.860335 0.918489
Nonce 91 49 0 83 81 2 47 10 0.040816 0.109890 0.914286 0.975904 0.890110 0.931034
PEM Private Key 1019 1483 0 1023 1019 4 1479 0 0.002697 0.000000 0.998401 0.996090 1.000000 0.998041
Password 1843 7479 2722 1721 1652 69 10132 191 0.006764 0.103635 0.978412 0.959907 0.896365 0.927048
Password 1844 7504 2709 1726 1657 69 10144 187 0.006756 0.101410 0.978768 0.960023 0.898590 0.928291
Salt 45 74 2 42 41 1 75 4 0.013158 0.088889 0.958678 0.976190 0.911111 0.942529
Secret 1368 28360 868 1235 1229 6 29222 139 0.000205 0.101608 0.995261 0.995142 0.898392 0.944295
Secret 1357 28357 867 1236 1230 6 29218 127 0.000205 0.093589 0.995651 0.995146 0.906411 0.948708
Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000
Slack Token 4 1 0 4 4 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
Token 648 3952 437 539 533 6 4383 115 0.001367 0.177469 0.975978 0.988868 0.822531 0.898062
Token 648 3953 437 540 534 6 4384 114 0.001367 0.175926 0.976181 0.988889 0.824074 0.898990
Twilio API Key 0 5 2 0 0 7 0 0.000000 1.000000
URL Credentials 209 129 239 196 196 0 368 13 0.000000 0.062201 0.977470 1.000000 0.937799 0.967901
URL Credentials 209 143 225 196 196 0 368 13 0.000000 0.062201 0.977470 1.000000 0.937799 0.967901
UUID 1069 1 0 1061 1060 1 0 9 1.000000 0.008419 0.990654 0.999057 0.991581 0.995305
8766 59713 5179 7742 7513 222 59491 1253 0.003718 0.142939 0.978461 0.971299 0.857061 0.910611
8754 59758 5151 7758 7529 222 59536 1225 0.003715 0.139936 0.978880 0.971359 0.860064 0.912330
2 changes: 1 addition & 1 deletion credsweeper/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class KeywordPattern:
separator = r"\s*\]?\s*" \
r"(?P<separator>:( [a-z]{3,9}[?]? )?=" \
r"|:|=>|!=|===|==|=)" \
r"\s*(?P<wrap>(\w|\.|->|\(|\[)*[\[\(\{](\w{1,32}=)?\s*)?"
r"\s*(?P<wrap>((new\s*)?\w|\.|->|\(|\[)*[\[\(\{](\w{1,32}=)?\s*)?"
# Authentication scheme ( oauth | basic | bearer | apikey ) precedes to credential
value = r"(?P<value_leftquote>((b|r|br|rb|u|f|rf|fr|\\{0,8})?[`'\"]){1,4})?" \
r"( ?(oauth|bot|basic|bearer|apikey|accesskey) )?" \
Expand Down
4 changes: 2 additions & 2 deletions experiment/main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ if [ 0 -ne ${error_code} ]; then exit ${error_code}; fi

cd ${CREDSWEEPER_DIR}
report_file=${RESULT_DIR}/${now}.json
${CREDSWEEPER_DIR}/.venv/bin/python -m credsweeper --sort --path ~/q/DataCred/abspos/data/ --log info --job $(nproc) --subtext --save-json ${report_file}
${CREDSWEEPER_DIR}/.venv/bin/python -m credsweeper --sort --path ~/q/DataCred/auxiliary/data/ --log info --job $(nproc) --subtext --save-json ${report_file}

cd ~/q/DataCred/abspos/
cd ~/q/DataCred/auxiliary/
.venv/bin/python -m benchmark --scanner credsweeper --load ${report_file} | tee ${report_file}.log
3 changes: 3 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,9 @@ def test_param_n(self) -> None:
def test_param_p(self) -> None:
# internal parametrized tests for quick debug
items = [ #
('tk.java',
b' final OAuth2AccessToken accessToken = new OAuth2AccessToken("7c9yp7.y513e1t629w7e8f3n1z4m856a05o");',
"OAuth2AccessToken accessToken", "7c9yp7.y513e1t629w7e8f3n1z4m856a05o"),
('my.toml', b'{nkey: XMIGDHSYNSJQ0XNR}', "nkey", "XMIGDHSYNSJQ0XNR"),
('my.yaml', b'password: 3287#JQ0XX@IG}', "password", "3287#JQ0XX@IG}"),
("creds.py", b'"tokens": ["xabsjhdbasu7d9g", "ashbjhdifufhsds"]', "tokens", "xabsjhdbasu7d9g"),
Expand Down

0 comments on commit 355a5d5

Please sign in to comment.