diff --git a/.ci/benchmark.txt b/.ci/benchmark.txt index af170d263..e55836fac 100644 --- a/.ci/benchmark.txt +++ b/.ci/benchmark.txt @@ -1,5 +1,5 @@ -META MD5 d51d1f5107d0906adfd81b9fd6467597 -DATA MD5 5e46a76147ee32073b0d587f80684f86 +META MD5 30ecf5f4796a36b60ca12cb702152bab +DATA MD5 9ac09dae7d8873d53e1fbf18da2d71c4 DATA: 16329853 interested lines. MARKUP: 59549 items FileType FileNumber ValidLines Positives Negatives Templates --------------- ------------ ------------ ----------- ----------- ----------- @@ -55,7 +55,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .erb 13 323 26 .erl 4 96 7 .ex 25 4968 5 98 5 -.example 17 1838 69 38 51 +.example 17 1838 74 36 51 .exs 24 4842 8 187 4 .ext 5 211 1 4 2 .fsproj 1 75 1 2 @@ -222,7 +222,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .yml 418 36057 522 910 376 .zsh 6 872 12 .zsh-theme 1 97 1 -TOTAL: 10003 16329853 11851 46611 5084 +TOTAL: 10003 16329853 11856 46609 5084 credsweeper result_cnt : 0, lost_cnt : 0, true_cnt : 0, false_cnt : 0 Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------ ----------- ----------- ----------- ---------- ---- ---- ----- ----- -------- -------- -------- ----- -------- ---- @@ -236,7 +236,7 @@ Azure Access Token 19 0 0 BASE64 Private Key 12 4 0 0 0 4 12 0.000000 1.000000 0.250000 0.000000 BASE64 encoded PEM Private Key 7 0 0 0 0 0 7 1.000000 0.000000 0.000000 Bitbucket Client ID 19 53 0 0 0 53 19 0.000000 1.000000 0.736111 0.000000 -Bitbucket Client Secret 27 66 1 0 0 67 27 0.000000 1.000000 0.712766 0.000000 +Bitbucket Client Secret 28 66 1 0 0 67 28 0.000000 1.000000 0.705263 0.000000 CMD ConvertTo-SecureString 13 4 0 0 0 4 13 0.000000 1.000000 0.235294 0.000000 CMD Password 21 128 6 0 0 134 21 0.000000 1.000000 0.864516 0.000000 CMD Secret 1 1 0 0 0 1 1 0.000000 1.000000 0.500000 0.000000 @@ -257,7 +257,7 @@ Grafana Provisioned API Key 22 1 0 JSON Web Token 170 61 0 0 0 61 170 0.000000 1.000000 0.264069 0.000000 Jira / Confluence PAT token 0 4 0 0 0 4 0 0.000000 1.000000 Jira 2FA 15 6 1 0 0 7 15 0.000000 1.000000 0.318182 0.000000 -Key 3909 15717 485 0 0 16202 3909 0.000000 1.000000 0.805629 0.000000 +Key 3911 15715 485 0 0 16200 3911 0.000000 1.000000 0.805529 0.000000 Nonce 93 49 0 0 0 49 93 0.000000 1.000000 0.345070 0.000000 Other 9 7447 5 0 0 7452 9 0.000000 1.000000 0.998794 0.000000 PEM Private Key 1019 1483 0 0 0 1483 1019 0.000000 1.000000 0.592726 0.000000 @@ -266,9 +266,10 @@ Salt 47 76 1 Secret 1297 1576 802 0 0 2378 1297 0.000000 1.000000 0.647075 0.000000 Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000 Slack Token 4 1 0 0 0 1 4 0.000000 1.000000 0.200000 0.000000 +Stripe Credentials 2 0 0 0 0 0 2 1.000000 0.000000 0.000000 Tencent WeChat API App ID 6 0 0 0 0 0 6 1.000000 0.000000 0.000000 Token 644 4170 454 0 0 4624 644 0.000000 1.000000 0.877752 0.000000 Twilio Credentials 30 39 0 0 0 39 30 0.000000 1.000000 0.565217 0.000000 URL Credentials 210 157 215 0 0 372 210 0.000000 1.000000 0.639175 0.000000 UUID 1075 265 0 0 0 265 1075 0.000000 1.000000 0.197761 0.000000 - 11851 46611 5084 0 0 0 46611 11851 0.000000 1.000000 0.797287 0.000000 + 11856 46609 5084 0 0 0 46609 11856 0.000000 1.000000 0.797212 0.000000 diff --git a/download_data.py b/download_data.py index c95852cd4..4da2fea94 100644 --- a/download_data.py +++ b/download_data.py @@ -319,6 +319,10 @@ def get_obfuscated_value(value, meta_row: MetaRow): obfuscated_value = '.'.join(obf_jwt) else: obfuscated_value = obfuscate_jwt(value) + elif any(value.startswith(x) for x in ["whsec_"]): + obfuscated_value = value[:6] + generate_value(value[6:]) + elif any(value.startswith(x) for x in ["pk_live_", "rk_live_", "sk_live_", "pk_test_", "rk_test_", "sk_test_"]): + obfuscated_value = value[:8] + generate_value(value[8:]) elif value.startswith("xox") and 15 <= len(value) and value[3] in "aboprst" and '-' == value[4]: obfuscated_value = value[:4] + generate_value(value[4:]) elif value.startswith("base64:"): diff --git a/meta/31423103.csv b/meta/31423103.csv index edcf6ec09..bcb3b4099 100644 --- a/meta/31423103.csv +++ b/meta/31423103.csv @@ -108,7 +108,7 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 133259,f63d3cf7,GitHub,31423103,data/31423103/src/f63d3cf7.js,759,759,F,F,,,F,F,,,,,0,0,F,F,F,Token 133260,f63d3cf7,GitHub,31423103,data/31423103/src/f63d3cf7.js,784,784,F,F,,,F,F,,,,,0,0,F,F,F,Token 133261,fa60852f,GitHub,31423103,data/31423103/src/fa60852f.js,688,688,F,F,,,F,F,,,,,0,0,F,F,F,Token -1023934,a3046da0,GitHub,31423103,data/31423103/test/a3046da0.example,46,46,F,F,,,F,F,,,,,0,0,F,F,F,Key -1338567,a3046da0,GitHub,31423103,data/31423103/test/a3046da0.example,47,47,F,F,12,44,F,F,,,,,0.0,0,F,F,F,Key +1023934,a3046da0,GitHub,31423103,data/31423103/test/a3046da0.example,46,46,T,F,12,44,F,F,,,,,0.0,0,F,F,F,Key:Stripe Credentials +1338567,a3046da0,GitHub,31423103,data/31423103/test/a3046da0.example,47,47,T,F,12,44,F,F,,,,,0.0,0,F,F,F,Key:Stripe Credentials:Bitbucket Client Secret 1339450,a3046da0,GitHub,31423103,data/31423103/test/a3046da0.example,20,20,T,F,12,48,F,F,,,,,0.0,0,F,F,F,UUID 1479653,a3046da0,GitHub,31423103,data/31423103/test/a3046da0.example,43,43,T,F,11,45,F,F,,,,,0.0,0,F,F,F,Twilio Credentials