From 5bd74275645e946acdc5eda980ed71eda2d1fea0 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Thu, 26 Oct 2023 17:52:32 +0300 Subject: [PATCH 1/5] removed extra keys --- credsweeper/rules/config.yaml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index 71b7c5b1c..1b5d1c085 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -277,7 +277,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?PAIza[0-9A-Za-z_-]{35})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: false validations: - GoogleApiKeyValidation required_substrings: @@ -291,7 +290,6 @@ - (?P[0-9]+\-[0-9A-Za-z_]{32}\.apps\.googleusercontent\.com) - (?[0-9a-zA-Z_-]{24})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: false validations: - GoogleMultiValidation required_substrings: @@ -348,7 +346,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?P[0-9a-zA-Z]{32}-us[0-9]{1,2})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: false validations: - MailChimpKeyValidation required_substrings: @@ -385,7 +382,6 @@ values: - (?Paccess_token\$production\$[0-9a-z]{16}\$[0-9a-z]{32})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: false required_substrings: - access_token$production$ min_line_len: 72 @@ -405,7 +401,6 @@ values: - (?Psk_live_[0-9a-z]{32})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: false required_substrings: - sk_live_ min_line_len: 40 @@ -428,7 +423,6 @@ values: - (?PSG\.[\w_]{16,32}\.[\w_]{16,64}) filter_type: GeneralPattern - use_ml: false required_substrings: - SG. min_line_len: 34 @@ -523,7 +517,6 @@ values: - (?Psq0csp-[0-9A-Za-z_-]{43})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: false required_substrings: - sq0csp min_line_len: 50 From 7634bdd18de6ce9a6c7596062afa5234888324f3 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Thu, 26 Oct 2023 18:07:00 +0300 Subject: [PATCH 2/5] removed ml for well-known prefixes patterns --- credsweeper/rules/config.yaml | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index 1b5d1c085..716931278 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -194,7 +194,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?P(ABIA|ACCA|AGPA|AIDA|AIPA|AKIA|ANPA|ANVA|AROA|APKA|ASCA|ASIA)[0-9A-Z]{16,17})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true required_substrings: - A min_line_len: 20 @@ -207,7 +206,6 @@ - (^|[^.0-9A-Za-z_/+-])(?P(AKIA|ASIA)[0-9A-Z]{16,17})([^=0-9A-Za-z_/+-]|$) - (?P[0-9a-zA-Z/+]{40}) filter_type: GeneralPattern - use_ml: true required_substrings: - AKIA - ASIA @@ -219,7 +217,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?Pamzn\.mws\.[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true required_substrings: - amzn min_line_len: 30 @@ -242,7 +239,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?Pdt0[a-zA-Z]{1}[0-9]{2}\.[A-Z0-9]{24}\.[A-Z0-9]{64})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true required_substrings: - dt0 min_line_len: 90 @@ -253,7 +249,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?PEAAC[0-9A-Za-z]{27,}) filter_type: GeneralPattern - use_ml: true required_substrings: - EAAC min_line_len: 31 @@ -302,7 +297,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?Pya29\.[0-9A-Za-z_-]{22,}) filter_type: GeneralPattern - use_ml: true required_substrings: - ya29. min_line_len: 27 @@ -313,7 +307,6 @@ values: - (?i)(?Pheroku(.{0,20})?[0-9a-f]{8}(-[0-9a-f]{4})+-[0-9a-f]{12})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true required_substrings: - heroku min_line_len: 24 @@ -324,7 +317,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?PIGQVJ[\w]{100,}) filter_type: GeneralPattern - use_ml: true required_substrings: - IGQVJ min_line_len: 105 @@ -358,7 +350,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?Pkey-[0-9a-zA-Z]{32})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true required_substrings: - key- min_line_len: 36 @@ -443,7 +434,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?Pxox[a|b|p|r|o|s]\-[-a-zA-Z0-9]{10,250}) filter_type: GeneralPattern - use_ml: true validations: - SlackTokenValidation required_substrings: @@ -456,7 +446,6 @@ values: - (?Phooks\.slack\.com/services/T\w{8}/B\w{8}/\w{24}) filter_type: GeneralPattern - use_ml: true required_substrings: - hooks.slack.com/services/T min_line_len: 61 @@ -467,7 +456,6 @@ values: - (?Psk_live_[0-9a-zA-Z]{24})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true validations: - StripeApiKeyValidation required_substrings: @@ -480,7 +468,6 @@ values: - (?Prk_live_[0-9a-zA-Z]{24})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true required_substrings: - rk_live_ min_line_len: 32 @@ -491,7 +478,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?PEAAA[0-9A-Za-z_-]{60})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true validations: - SquareAccessTokenValidation required_substrings: @@ -504,7 +490,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?Psq0[a-z]{3}-[0-9A-Za-z_-]{22})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true validations: - SquareClientIdValidation required_substrings: @@ -539,7 +524,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?PSK[0-9a-fA-F]{32})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true required_substrings: - SK min_line_len: 34 From 0f03592e0b452b57e4cb0f660d9fa5e9a1c90a0c Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Thu, 26 Oct 2023 18:52:09 +0300 Subject: [PATCH 3/5] tests fixed --- tests/__init__.py | 2 +- tests/data/depth_3.json | 192 +++++++++++++++++++++++++++------------- tests/data/doc.json | 72 +++++++-------- tests/data/output.json | 164 ++++++++++++++++++++++++---------- tests/test_app.py | 4 +- 5 files changed, 289 insertions(+), 145 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index 4a0d5fd44..c9a1df68c 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -8,7 +8,7 @@ SAMPLES_CRED_LINE_COUNT: int = 402 # credentials count after post-processing -SAMPLES_POST_CRED_COUNT: int = 293 +SAMPLES_POST_CRED_COUNT: int = 296 # with option --doc SAMPLES_IN_DOC = 426 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index 514f03140..993088e3e 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -97,8 +97,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -121,8 +121,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -145,8 +145,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -184,8 +184,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -208,8 +208,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -247,8 +247,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91871, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS MWS Key", "severity": "high", "line_data_list": [ @@ -271,8 +271,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91871, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Key", "severity": "medium", "line_data_list": [ @@ -4831,8 +4831,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99108, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Dynatrace API Token", "severity": "high", "line_data_list": [ @@ -4903,8 +4903,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.83427, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Facebook Access Token", "severity": "high", "line_data_list": [ @@ -4927,8 +4927,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.83427, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Token", "severity": "medium", "line_data_list": [ @@ -5326,8 +5326,80 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99757, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Auth", + "severity": "medium", + "line_data_list": [ + { + "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", + "line_num": 1, + "path": "tests/samples/google_oauth_key", + "info": "tests/samples/google_oauth_key|RAW", + "value": "ya29.gi_reo_gi_crackle_ln22", + "value_start": 20, + "value_end": 47, + "variable": "google_oauth_key", + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.1797273164975133, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Google OAuth Access Token", + "severity": "high", + "line_data_list": [ + { + "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", + "line_num": 1, + "path": "tests/samples/google_oauth_key", + "info": "tests/samples/google_oauth_key|RAW", + "value": "ya29.gi_reo_gi_crackle_ln22", + "value_start": 20, + "value_end": 47, + "variable": null, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.1797273164975133, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Key", + "severity": "medium", + "line_data_list": [ + { + "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", + "line_num": 1, + "path": "tests/samples/google_oauth_key", + "info": "tests/samples/google_oauth_key|RAW", + "value": "ya29.gi_reo_gi_crackle_ln22", + "value_start": 20, + "value_end": 47, + "variable": "google_oauth_key", + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.1797273164975133, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Google OAuth Access Token", "severity": "high", "line_data_list": [ @@ -5422,8 +5494,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.95517, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Heroku API Key", "severity": "high", "line_data_list": [ @@ -5470,8 +5542,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.71488, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Instagram Access Token", "severity": "high", "line_data_list": [ @@ -5902,8 +5974,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99189, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "MailGun API Key", "severity": "high", "line_data_list": [ @@ -5926,8 +5998,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -5950,8 +6022,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -6013,8 +6085,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -6037,8 +6109,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -6148,8 +6220,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99994, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -6172,8 +6244,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99994, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Password", "severity": "medium", "line_data_list": [ @@ -8146,8 +8218,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99994, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -8290,8 +8362,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.89421, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Slack Token", "severity": "high", "line_data_list": [ @@ -8314,8 +8386,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6364, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Slack Webhook", "severity": "high", "line_data_list": [ @@ -8338,8 +8410,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7944, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Square Access Token", "severity": "high", "line_data_list": [ @@ -8362,8 +8434,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.75821, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Square Client ID", "severity": "medium", "line_data_list": [ @@ -8770,8 +8842,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.85074, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Google OAuth Access Token", "severity": "high", "line_data_list": [ @@ -8818,8 +8890,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.85074, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Google OAuth Access Token", "severity": "high", "line_data_list": [ @@ -8866,8 +8938,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.76194, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Facebook Access Token", "severity": "high", "line_data_list": [ @@ -8890,8 +8962,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.76194, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Facebook Access Token", "severity": "high", "line_data_list": [ @@ -8938,8 +9010,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6423, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Twilio API Key", "severity": "high", "line_data_list": [ diff --git a/tests/data/doc.json b/tests/data/doc.json index 5c28311c4..245ce25fa 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -184,8 +184,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -208,8 +208,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -9742,8 +9742,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99108, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Dynatrace API Token", "severity": "high", "line_data_list": [ @@ -10237,8 +10237,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99757, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Google OAuth Access Token", "severity": "high", "line_data_list": [ @@ -10333,8 +10333,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.95517, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Heroku API Key", "severity": "high", "line_data_list": [ @@ -10357,8 +10357,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.71488, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Instagram Access Token", "severity": "high", "line_data_list": [ @@ -10525,8 +10525,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99189, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "MailGun API Key", "severity": "high", "line_data_list": [ @@ -10549,8 +10549,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -10573,8 +10573,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -10636,8 +10636,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -10660,8 +10660,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -11437,8 +11437,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.89421, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Slack Token", "severity": "high", "line_data_list": [ @@ -11461,8 +11461,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6364, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Slack Webhook", "severity": "high", "line_data_list": [ @@ -11485,8 +11485,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7944, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Square Access Token", "severity": "high", "line_data_list": [ @@ -11509,8 +11509,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.75821, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Square Client ID", "severity": "medium", "line_data_list": [ @@ -11773,8 +11773,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.85074, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Google OAuth Access Token", "severity": "high", "line_data_list": [ @@ -11821,8 +11821,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.76194, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Facebook Access Token", "severity": "high", "line_data_list": [ @@ -11869,8 +11869,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6423, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Twilio API Key", "severity": "high", "line_data_list": [ diff --git a/tests/data/output.json b/tests/data/output.json index d2e2f7d19..9bc0eded3 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -97,8 +97,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -121,8 +121,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -145,8 +145,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -184,8 +184,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -208,8 +208,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -247,8 +247,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91871, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS MWS Key", "severity": "high", "line_data_list": [ @@ -271,8 +271,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91871, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Key", "severity": "medium", "line_data_list": [ @@ -4807,8 +4807,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99108, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Dynatrace API Token", "severity": "high", "line_data_list": [ @@ -4831,8 +4831,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.83427, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Facebook Access Token", "severity": "high", "line_data_list": [ @@ -4855,8 +4855,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.83427, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Token", "severity": "medium", "line_data_list": [ @@ -5254,8 +5254,80 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99757, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Auth", + "severity": "medium", + "line_data_list": [ + { + "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", + "line_num": 1, + "path": "tests/samples/google_oauth_key", + "info": "", + "value": "ya29.gi_reo_gi_crackle_ln22", + "value_start": 20, + "value_end": 47, + "variable": "google_oauth_key", + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.1797273164975133, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Google OAuth Access Token", + "severity": "high", + "line_data_list": [ + { + "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", + "line_num": 1, + "path": "tests/samples/google_oauth_key", + "info": "", + "value": "ya29.gi_reo_gi_crackle_ln22", + "value_start": 20, + "value_end": 47, + "variable": null, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.1797273164975133, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Key", + "severity": "medium", + "line_data_list": [ + { + "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", + "line_num": 1, + "path": "tests/samples/google_oauth_key", + "info": "", + "value": "ya29.gi_reo_gi_crackle_ln22", + "value_start": 20, + "value_end": 47, + "variable": "google_oauth_key", + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.1797273164975133, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Google OAuth Access Token", "severity": "high", "line_data_list": [ @@ -5350,8 +5422,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.95517, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Heroku API Key", "severity": "high", "line_data_list": [ @@ -5398,8 +5470,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.71488, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Instagram Access Token", "severity": "high", "line_data_list": [ @@ -5782,8 +5854,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99189, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "MailGun API Key", "severity": "high", "line_data_list": [ @@ -5806,8 +5878,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -5830,8 +5902,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -5893,8 +5965,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -5917,8 +5989,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -6910,8 +6982,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.89421, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Slack Token", "severity": "high", "line_data_list": [ @@ -6934,8 +7006,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6364, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Slack Webhook", "severity": "high", "line_data_list": [ @@ -6958,8 +7030,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7944, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Square Access Token", "severity": "high", "line_data_list": [ @@ -6982,8 +7054,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.75821, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Square Client ID", "severity": "medium", "line_data_list": [ @@ -7198,8 +7270,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6423, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Twilio API Key", "severity": "high", "line_data_list": [ diff --git a/tests/test_app.py b/tests/test_app.py index b5dfdc044..03e58231a 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -129,7 +129,7 @@ def test_it_works_with_multiline_in_patch_p(self) -> None: / value: 'AKIAQWADE5R42RDZ4JEM' / entropy_validation: BASE64_CHARS 3.684184 False] / api_validation: NOT_AVAILABLE - / ml_validation: VALIDATED_KEY + / ml_validation: NOT_AVAILABLE rule: AWS Multi / severity: high / line_data_list: @@ -144,7 +144,7 @@ def test_it_works_with_multiline_in_patch_p(self) -> None: / value: 'V84C7sDU001tFFodKU95USNy97TkqXymnvsFmYhQ' / entropy_validation: BASE64_CHARS 4.784184 True] / api_validation: NOT_AVAILABLE - / ml_validation: VALIDATED_KEY + / ml_validation: NOT_AVAILABLE rule: Token / severity: medium / line_data_list: From 682f7a0e3d41e43591d4f7070fa6d096a558b583 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Mon, 30 Oct 2023 07:41:32 +0200 Subject: [PATCH 4/5] benchmark scores fix --- cicd/benchmark.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cicd/benchmark.txt b/cicd/benchmark.txt index 3bd78ff61..85fd06529 100644 --- a/cicd/benchmark.txt +++ b/cicd/benchmark.txt @@ -16,10 +16,10 @@ Category TP FP TN FN FPR FNR -------------------------- ---- ---- -------- ---- --------- --------- -------- -------- -------- -------- Authentication Key & Token 54 4 28 16 0.125 0.228571 0.803922 0.931034 0.771429 0.84375 Generic Secret 973 3 215 83 0.0137615 0.0785985 0.932496 0.996926 0.921402 0.957677 -Generic Token 287 7 596 46 0.0116086 0.138138 0.943376 0.97619 0.861862 0.91547 +Generic Token 289 7 596 44 0.0116086 0.132132 0.945513 0.976351 0.867868 0.918919 Other 818 750 63395 258 0.0116923 0.239777 0.984545 0.521684 0.760223 0.618759 Password 995 130 4150 410 0.0303738 0.291815 0.905013 0.884444 0.708185 0.786561 Predefined Pattern 309 2 40 17 0.0476191 0.0521472 0.94837 0.993569 0.947853 0.970173 Private Key 967 0 4 34 0.033966 0.966169 1 0.966034 0.982724 Seed, Salt, Nonce 36 2 6 4 0.25 0.1 0.875 0.947368 0.9 0.923077 - 4439 898 19428253 868 4.622e-05 0.163558 0.999909 0.831741 0.836442 0.834085 + 4441 898 19428253 866 4.622e-05 0.163181 0.999909 0.831804 0.836819 0.834304 From 4498b0b9cbcb9e46c79102c2e932855a63645648 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Mon, 30 Oct 2023 08:28:55 +0200 Subject: [PATCH 5/5] benchmark scores fix 2 --- cicd/benchmark.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cicd/benchmark.txt b/cicd/benchmark.txt index 85fd06529..394e5d888 100644 --- a/cicd/benchmark.txt +++ b/cicd/benchmark.txt @@ -10,8 +10,8 @@ Predefined Pattern 326 2 40 Private Key 1001 1 3 Seed, Salt, Nonce 40 4 4 TOTAL: 5307 63688 5644 -Detected Credentials: 5993 -credsweeper result_cnt : 5337, lost_cnt : 0, true_cnt : 4439, false_cnt : 898 +Detected Credentials: 5997 +credsweeper result_cnt : 5339, lost_cnt : 0, true_cnt : 4441, false_cnt : 898 Category TP FP TN FN FPR FNR ACC PRC RCL F1 -------------------------- ---- ---- -------- ---- --------- --------- -------- -------- -------- -------- Authentication Key & Token 54 4 28 16 0.125 0.228571 0.803922 0.931034 0.771429 0.84375