diff --git a/.ci/benchmark.txt b/.ci/benchmark.txt index a437f50af..7f76e8369 100644 --- a/.ci/benchmark.txt +++ b/.ci/benchmark.txt @@ -1,6 +1,6 @@ -META MD5 f019321883fa9315afcd43fa085b5bf9 -DATA MD5 de85ea0a77bd333be6a0d8422b835df4 -DATA: 16344639 interested lines. MARKUP: 62823 items +META MD5 ef775241a6d575ff10f7220dcfadf5d7 +DATA MD5 51b6d4e4debbd374fc184f2b691e0bb8 +DATA: 16344639 interested lines. MARKUP: 62827 items FileType FileNumber ValidLines Positives Negatives Templates --------------- ------------ ------------ ----------- ----------- ----------- 194 28318 71 418 90 @@ -113,7 +113,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .markdown 3 139 3 1 .markerb 3 12 3 .marko 1 21 2 -.md 674 149300 740 2388 621 +.md 674 149300 741 2388 621 .mdx 3 549 7 .mjml 1 18 1 .mjs 22 4424 75 340 @@ -135,7 +135,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .patch 4 109405 4 27 .pbxproj 1 941 2 .pem 48 1169 47 8 -.php 371 75710 128 1622 79 +.php 371 75710 130 1622 79 .pl 16 14727 7 33 .pm 3 744 7 .po 3 2994 15 @@ -153,7 +153,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .pug 2 193 2 .purs 1 69 4 .pxd 1 150 5 2 -.py 890 291553 679 3303 726 +.py 890 291553 681 3303 726 .pyi 4 1361 9 .pyp 1 167 1 .pyx 2 1094 23 @@ -172,7 +172,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .rs 31 9855 2 234 11 .rsc 1 691 1 .rsp 16 7101 20 10 27 -.rst 86 33980 70 323 68 +.rst 86 33980 71 323 68 .rules 1 6 2 .sample 2 25 3 4 4 .sbt 3 570 5 2 @@ -222,7 +222,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .yml 419 36169 559 889 376 .zsh 6 872 12 .zsh-theme 1 97 1 -TOTAL: 10254 16344639 12221 50501 5104 +TOTAL: 10254 16344639 12227 50501 5104 credsweeper result_cnt : 0, lost_cnt : 0, true_cnt : 0, false_cnt : 0 Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------ ----------- ----------- ----------- ---------- ---- ---- ----- ----- -------- -------- -------- ----- -------- ---- @@ -266,8 +266,9 @@ Salt 47 76 1 Secret 1297 1576 802 0 0 2378 1297 0.000000 1.000000 0.647075 0.000000 Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000 Slack Token 4 1 0 0 0 1 4 0.000000 1.000000 0.200000 0.000000 +Tencent WeChat API App ID 6 0 0 0 0 0 6 1.000000 0.000000 0.000000 Token 643 4170 454 0 0 4624 643 0.000000 1.000000 0.877919 0.000000 Twilio API Key 0 5 2 0 0 7 0 0.000000 1.000000 URL Credentials 210 156 216 0 0 372 210 0.000000 1.000000 0.639175 0.000000 UUID 1069 265 0 0 0 265 1069 0.000000 1.000000 0.198651 0.000000 - 12221 50501 5104 0 0 0 50501 12221 0.000000 1.000000 0.805156 0.000000 + 12227 50501 5104 0 0 0 50501 12227 0.000000 1.000000 0.805079 0.000000 diff --git a/.github/workflows/review.yml b/.github/workflows/review.yml index 923bce564..309392064 100644 --- a/.github/workflows/review.yml +++ b/.github/workflows/review.yml @@ -7,6 +7,9 @@ on: pull_request: branches: [ main ] +permissions: + contents: read + jobs: # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -17,8 +20,13 @@ jobs: steps: + - name: Harden Runner + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 + with: + egress-policy: audit + - name: Checkout CredData - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: ref: ${{ github.event.pull_request.head.sha }} @@ -32,7 +40,7 @@ jobs: - name: Cache head review id: cache-data - uses: actions/cache@v4 + uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: path: | review_head.txt @@ -42,7 +50,7 @@ jobs: - name: Cache tmp if: steps.cache-data.outputs.cache-hit != 'true' id: cache-tmp - uses: actions/cache@v4 + uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: path: tmp key: cred-data-${{ hashFiles('snapshot.yaml') }} @@ -53,7 +61,7 @@ jobs: - name: Set up Python 3.10 if: steps.cache-data.outputs.cache-hit != 'true' - uses: actions/setup-python@v4 + uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4.7.1 with: python-version: "3.10" @@ -76,7 +84,7 @@ jobs: - name: Upload artifact if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: review_head path: | @@ -94,8 +102,13 @@ jobs: steps: + - name: Harden Runner + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 + with: + egress-policy: audit + - name: Checkout CredData - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: ref: ${{ github.event.pull_request.base.sha }} @@ -109,7 +122,7 @@ jobs: - name: Cache base review id: cache-data - uses: actions/cache@v4 + uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: path: | review_base.txt @@ -119,7 +132,7 @@ jobs: - name: Cache tmp if: steps.cache-data.outputs.cache-hit != 'true' id: cache-tmp - uses: actions/cache@v4 + uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: path: tmp key: cred-data-${{ hashFiles('snapshot.yaml') }} @@ -130,7 +143,7 @@ jobs: - name: Set up Python 3.10 if: steps.cache-data.outputs.cache-hit != 'true' - uses: actions/setup-python@v4 + uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4.7.1 with: python-version: "3.10" @@ -145,7 +158,7 @@ jobs: - name: Upload artifact if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: review_base path: | @@ -162,11 +175,16 @@ jobs: steps: + - name: Harden Runner + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 + with: + egress-policy: audit + - name: install ansi2html run: sudo apt update && sudo apt install colorized-logs - name: Download all workflow run artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 - name: Get diff for review run: | @@ -179,7 +197,7 @@ jobs: - name: Upload artifact if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: review_diff path: | @@ -194,13 +212,18 @@ jobs: steps: + - name: Harden Runner + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 + with: + egress-policy: audit + - name: Checkout CredData - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python 3.10 - uses: actions/setup-python@v4 + uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4.7.1 with: python-version: "3.10" @@ -247,7 +270,7 @@ jobs: - name: Upload artifact if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: reports path: | diff --git a/download_data.py b/download_data.py index 1a70d6f36..1a4e5c1bd 100644 --- a/download_data.py +++ b/download_data.py @@ -333,6 +333,8 @@ def get_obfuscated_value(value, meta_row: MetaRow): obfuscated_value = value[:9] + generate_value(value[9:]) elif value.startswith("hooks.slack.com/services/"): obfuscated_value = "hooks.slack.com/services/" + generate_value(value[25:]) + elif value.startswith("wx") and 18 == len(value): + obfuscated_value = "wx" + generate_value(value[2:]) elif ".apps.googleusercontent.com" in value: pos = value.index(".apps.googleusercontent.com") obfuscated_value = generate_value(value[:pos]) + ".apps.googleusercontent.com" + generate_value( diff --git a/meta/a0abd87c.csv b/meta/a0abd87c.csv index a6f95ffdf..06874eac4 100644 --- a/meta/a0abd87c.csv +++ b/meta/a0abd87c.csv @@ -132,3 +132,8 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 130470,5246398e,GitHub,a0abd87c,data/a0abd87c/src/5246398e.py,42,42,F,F,,,F,F,,,,,0.00,,F,F,F,Password 130471,ccaf232a,GitHub,a0abd87c,data/a0abd87c/src/ccaf232a.rst,8,8,F,F,,,F,F,,,,,0.00,,F,F,F,Password 134306,dcdf77ab,GitHub,a0abd87c,data/a0abd87c/src/dcdf77ab.rst,15,15,F,F,-1,-1,F,F,,,,,0.0,0,F,F,F,Key +1479649,ee18e1cb,GitHub,a0abd87c,data/a0abd87c/test/ee18e1cb.py,24,24,T,F,13,31,F,F,,,,,0.0,0,F,F,F,Tencent WeChat API App ID +1479650,fcb28afe,GitHub,a0abd87c,data/a0abd87c/other/fcb28afe.md,145,145,T,F,33,51,F,F,,,,,0.0,0,F,F,F,Tencent WeChat API App ID +1479651,ccaf232a,GitHub,a0abd87c,data/a0abd87c/src/ccaf232a.rst,88,88,T,F,26,44,F,F,,,,,0.0,0,F,F,F,Tencent WeChat API App ID +1479652,5246398e,GitHub,a0abd87c,data/a0abd87c/src/5246398e.py,153,153,T,F,26,44,F,F,,,,,0.0,0,F,F,F,Tencent WeChat API App ID + diff --git a/meta/e3377359.csv b/meta/e3377359.csv index e46b83f20..0d758815e 100644 --- a/meta/e3377359.csv +++ b/meta/e3377359.csv @@ -119,3 +119,6 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 138197,719d25bb,GitHub,e3377359,data/e3377359/src/719d25bb.php,298,298,F,F,297,329,F,F,,,,WrongPos,0.0,0,F,F,F,Bitbucket Client ID:Bitbucket Client Secret 138198,bc555d60,GitHub,e3377359,data/e3377359/src/bc555d60.php,52,52,T,T,540,555,F,F,Any,,,WrongPos,4.33,24,F,F,F,Key 138199,bc555d60,GitHub,e3377359,data/e3377359/src/bc555d60.php,52,52,T,T,312,344,F,F,Any,,,WrongPos,4.33,24,F,F,F,Bitbucket Client ID:Bitbucket Client Secret +1479647,87fccb9b,GitHub,e3377359,data/e3377359/src/87fccb9b.php,357,357,T,F,78,96,F,F,,,,,0.0,0,F,F,F,Tencent WeChat API App ID +1479648,87fccb9b,GitHub,e3377359,data/e3377359/src/87fccb9b.php,357,357,T,F,204,222,F,F,,,,,0.0,0,F,F,F,Tencent WeChat API App ID + diff --git a/requirements.txt b/requirements.txt index c8deb02f9..11cd56b64 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,11 @@ -PyYAML==6.0.1 -GitPython==3.1.41 -virtualenv==20.25.0 +PyYAML==6.0.2 +GitPython==3.1.43 +virtualenv==20.27.1 -setuptools==69.0.3 -tabulate~=0.9.0 +setuptools==70.0.0 +tabulate==0.9.0 #credentialdigger==4.9.5 # review_data colorama==0.4.6 -