Samsung · babenek · Dec 16, 2024 · Dec 13, 2024 · Dec 13, 2024 · Dec 13, 2024
@@ -1,5 +1,5 @@
-META MD5 d51d1f5107d0906adfd81b9fd6467597
-DATA MD5 5e46a76147ee32073b0d587f80684f86
+META MD5 30ecf5f4796a36b60ca12cb702152bab
+DATA MD5 9ac09dae7d8873d53e1fbf18da2d71c4
 DATA: 16329853 interested lines. MARKUP: 59549 items
 FileType           FileNumber    ValidLines    Positives    Negatives    Templates
 ---------------  ------------  ------------  -----------  -----------  -----------
@@ -55,7 +55,7 @@ FileType           FileNumber    ValidLines    Positives    Negatives    Templat
 .erb                       13           323                        26
 .erl                        4            96                         7
 .ex                        25          4968            5           98            5
-.example                   17          1838           69           38           51
+.example                   17          1838           74           36           51
 .exs                       24          4842            8          187            4
 .ext                        5           211            1            4            2
 .fsproj                     1            75            1            2
@@ -222,8 +222,8 @@ FileType           FileNumber    ValidLines    Positives    Negatives    Templat
 .yml                      418         36057          522          910          376
 .zsh                        6           872                        12
 .zsh-theme                  1            97                         1
-TOTAL:                  10003      16329853        11851        46611         5084
-credsweeper result_cnt : 11610, lost_cnt : 0, true_cnt : 11346, false_cnt : 264
+TOTAL:                  10003      16329853        11856        46609         5084
+credsweeper result_cnt : 11613, lost_cnt : 0, true_cnt : 11349, false_cnt : 264
 Rules                             Positives    Negatives    Templates    Reported     TP    FP     TN    FN       FPR       FNR       ACC       PRC       RCL        F1
 ------------------------------  -----------  -----------  -----------  ----------  -----  ----  -----  ----  --------  --------  --------  --------  --------  --------
 API                                     130         3166          188         125    123     2   3352     7  0.000596  0.053846  0.997417  0.984000  0.946154  0.964706
@@ -236,7 +236,7 @@ Azure Access Token                       19            0            0          1
 BASE64 Private Key                       12            4            0          12     12     0      4     0  0.000000  0.000000  1.000000  1.000000  1.000000  1.000000
 BASE64 encoded PEM Private Key            7            0            0           5      5     0      0     2            0.285714  0.714286  1.000000  0.714286  0.833333
 Bitbucket Client ID                      19           53            0          75     19    53      0     0  1.000000  0.000000  0.263889  0.263889  1.000000  0.417582
-Bitbucket Client Secret                  27           66            1          97     27    67      0     0  1.000000  0.000000  0.287234  0.287234  1.000000  0.446281
+Bitbucket Client Secret                  28           66            1          98     28    67      0     0  1.000000  0.000000  0.294737  0.294737  1.000000  0.455285
 CMD ConvertTo-SecureString               13            4            0          13     13     0      4     0  0.000000  0.000000  1.000000  1.000000  1.000000  1.000000
 CMD Password                             21          128            6          18     18     0    134     3  0.000000  0.142857  0.980645  1.000000  0.857143  0.923077
 CMD Secret                                1            1            0           1      1     0      1     0  0.000000  0.000000  1.000000  1.000000  1.000000  1.000000
@@ -257,7 +257,7 @@ Grafana Provisioned API Key              22            1            0
 JSON Web Token                          170           61            0         131    131     0     61    39  0.000000  0.229412  0.831169  1.000000  0.770588  0.870432
 Jira / Confluence PAT token               0            4            0                  0     0      4     0  0.000000            1.000000
 Jira 2FA                                 15            6            1          12     12     0      7     3  0.000000  0.200000  0.863636  1.000000  0.800000  0.888889
-Key                                    3909        15717          485        3944   3893    51  16151    16  0.003148  0.004093  0.996668  0.987069  0.995907  0.991468
+Key                                    3911        15715          485        3944   3893    51  16149    18  0.003148  0.004602  0.996569  0.987069  0.995398  0.991216
 Nonce                                    93           49            0          91     90     1     48     3  0.020408  0.032258  0.971831  0.989011  0.967742  0.978261
 Other                                     9         7447            5                  0     0   7452     9  0.000000  1.000000  0.998794            0.000000
 PEM Private Key                        1019         1483            0        1023   1019     4   1479     0  0.002697  0.000000  0.998401  0.996090  1.000000  0.998041
@@ -266,9 +266,10 @@ Salt                                     47           76            1          4
 Secret                                 1297         1576          802        1288   1283     5   2373    14  0.002103  0.010794  0.994830  0.996118  0.989206  0.992650
 Seed                                      1            6            0                  0     0      6     1  0.000000  1.000000  0.857143            0.000000
 Slack Token                               4            1            0           4      4     0      1     0  0.000000  0.000000  1.000000  1.000000  1.000000  1.000000
+Stripe Credentials                        2            0            0           2      2     0      0     0            0.000000  1.000000  1.000000  1.000000  1.000000
 Tencent WeChat API App ID                 6            0            0           6      6     0      0     0            0.000000  1.000000  1.000000  1.000000  1.000000
 Token                                   644         4170          454         617    615     2   4622    29  0.000433  0.045031  0.994115  0.996759  0.954969  0.975416
 Twilio Credentials                       30           39            0          30     30     0     39     0  0.000000  0.000000  1.000000  1.000000  1.000000  1.000000
 URL Credentials                         210          157          215         205    205     0    372     5  0.000000  0.023810  0.991409  1.000000  0.976190  0.987952
 UUID                                   1075          265            0        1074   1073     1    264     2  0.003774  0.001860  0.997761  0.999069  0.998140  0.998604
-                                      11851        46611         5084       11623  11346   264  46347   505  0.005664  0.042612  0.986846  0.977261  0.957388  0.967222
+                                      11856        46609         5084       11626  11349   264  46345   507  0.005664  0.042763  0.986813  0.977267  0.957237  0.967148
@@ -4,10 +4,8 @@
     DataContentProvider, \
     TextContentProvider
 from credsweeper.ml_model.ml_validator import MlValidator
-from credsweeper.validations.apply_validation import ApplyValidation
 
 __all__ = [
-    'ApplyValidation',  #
     'ByteContentProvider',  #
     'ContentProvider',  #
     'CredSweeper',  #

@@ -196,11 +196,6 @@ def get_arguments() -> Namespace:
                         dest="ml_providers",
                         required=False,
                         metavar="STR")
-    parser.add_argument("--api_validation",
-                        help="add credential api validation option to credsweeper pipeline. "
-                        "External API is used to reduce FP for some rule types.",
-                        dest="api_validation",
-                        action="store_true")
     parser.add_argument("--jobs",
                         "-j",
                         help="number of parallel processes to use (default: 1)",
@@ -297,7 +292,6 @@ def scan(args: Namespace, content_provider: AbstractProvider, json_filename: Opt
 
         credsweeper = CredSweeper(rule_path=args.rule_path,
                                   config_path=args.config_path,
-                                  api_validation=args.api_validation,
                                   json_filename=json_filename,
                                   xlsx_filename=xlsx_filename,
                                   color=args.color,

@@ -20,7 +20,6 @@
 from credsweeper.file_handler.text_content_provider import TextContentProvider
 from credsweeper.scanner import Scanner
 from credsweeper.utils import Util
-from credsweeper.validations.apply_validation import ApplyValidation
 
 logger = logging.getLogger(__name__)
 
@@ -40,7 +39,6 @@ class CredSweeper:
     def __init__(self,
                  rule_path: Union[None, str, Path] = None,
                  config_path: Optional[str] = None,
-                 api_validation: bool = False,
                  json_filename: Union[None, str, Path] = None,
                  xlsx_filename: Union[None, str, Path] = None,
                  color: bool = False,
@@ -69,8 +67,6 @@ def __init__(self,
                 validation was the grained candidate model on machine learning
             config_path: optional str variable, path of CredSweeper config file
                 default built-in config is used if None
-            api_validation: optional boolean variable, specifying the need of
-                parallel API validation
             json_filename: optional string variable, path to save result
                 to json
             xlsx_filename: optional string variable, path to save result
@@ -100,7 +96,6 @@ def __init__(self,
             raise RuntimeError(f"Severity level provided: {severity}"
                                f" -- must be one of: {' | '.join([i.value for i in Severity])}")
         config_dict = self._get_config_dict(config_path=config_path,
-                                            api_validation=api_validation,
                                             use_filters=use_filters,
                                             find_by_ext=find_by_ext,
                                             depth=depth,
@@ -141,7 +136,6 @@ def _get_config_path(config_path: Optional[str]) -> Path:
     def _get_config_dict(
             self,  #
             config_path: Optional[str],  #
-            api_validation: bool,  #
             use_filters: bool,  #
             find_by_ext: bool,  #
             depth: int,  #
@@ -151,8 +145,6 @@ def _get_config_dict(
             exclude_lines: Optional[List[str]],  #
             exclude_values: Optional[List[str]]) -> Dict[str, Any]:
         config_dict = Util.json_load(self._get_config_path(config_path))
-        config_dict["validation"] = {}
-        config_dict["validation"]["api_validation"] = api_validation
         config_dict["use_filters"] = use_filters
         config_dict["find_by_ext"] = find_by_ext
         config_dict["size_limit"] = size_limit
@@ -272,14 +264,7 @@ def scan(self, content_providers: Sequence[Union[DiffContentProvider, TextConten
     def __single_job_scan(self, content_providers: Sequence[Union[DiffContentProvider, TextContentProvider]]) -> None:
         """Performs scan in main thread"""
         all_cred = self.files_scan(content_providers)
-        if self.config.api_validation:
-            api_validation = ApplyValidation()
-            for cred in all_cred:
-                logger.info("Run API Validation")
-                cred.api_validation = api_validation.validate(cred)
-                self.credential_manager.add_credential(cred)
-        else:
-            self.credential_manager.set_credentials(all_cred)
+        self.credential_manager.set_credentials(all_cred)
 
     # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 
@@ -293,8 +278,6 @@ def __multi_jobs_scan(self, content_providers: Sequence[Union[DiffContentProvide
             if "SILENCE" == self.__log_level:
                 logging.addLevelName(60, "SILENCE")
             log_kwargs["level"] = self.__log_level
-        # providers_map: List[Sequence[Union[DiffContentProvider, TextContentProvider]]] = \
-        #     [content_providers[x::self.pool_count] for x in range(self.pool_count)]
         with multiprocessing.get_context("spawn").Pool(processes=self.pool_count,
                                                        initializer=self.pool_initializer,
                                                        initargs=(log_kwargs, )) as pool:
@@ -303,10 +286,6 @@ def __multi_jobs_scan(self, content_providers: Sequence[Union[DiffContentProvide
                                                                           for x in range(self.pool_count))):
                     for cred in scan_results:
                         self.credential_manager.add_credential(cred)
-                if self.config.api_validation:
-                    logger.info("Run API Validation")
-                    api_validation = ApplyValidation()
-                    api_validation.validate_credentials(pool, self.credential_manager)
             except KeyboardInterrupt:
                 pool.terminate()
                 pool.join()

@@ -29,7 +29,6 @@ def __init__(self, config: Dict[str, Any]) -> None:
         self.check_for_literals: bool = config["check_for_literals"]
         self.not_allowed_path_pattern = re.compile(f"{Util.get_regex_combine_or(self.NOT_ALLOWED_PATH)}",
                                                    flags=re.IGNORECASE)
-        self.api_validation: bool = config["validation"]["api_validation"]
         self.use_filters: bool = config["use_filters"]
         self.line_data_output: List[str] = config["line_data_output"]
         self.candidate_output: List[str] = config["candidate_output"]

@@ -6,7 +6,6 @@
 from credsweeper.common.constants import KeyValidationOption, Severity, Confidence
 from credsweeper.config import Config
 from credsweeper.credentials.line_data import LineData
-from credsweeper.validations.validation import Validation
 
 
 class Candidate:
@@ -31,19 +30,15 @@ def __init__(self,
                  rule_name: str,
                  severity: Severity,
                  config: Optional[Config] = None,
-                 validations: List[Validation] = None,
                  use_ml: bool = False,
                  confidence: Confidence = Confidence.MODERATE) -> None:
         self.line_data_list = line_data_list
         self.patterns = patterns
         self.rule_name = rule_name
         self.severity = severity
         self.config = config
-        self.validations: List[Validation] = validations if validations is not None else []
         self.use_ml = use_ml
         self.confidence = confidence
-
-        self.api_validation = KeyValidationOption.NOT_AVAILABLE
         self.ml_validation = KeyValidationOption.NOT_AVAILABLE
         self.ml_probability: Optional[float] = None
 
@@ -52,7 +47,6 @@ def compare(self, other: 'Candidate') -> bool:
         if self.rule_name == other.rule_name \
                 and self.severity == other.severity \
                 and self.confidence == other.confidence \
-                and self.api_validation == other.api_validation \
                 and self.use_ml == other.use_ml \
                 and self.ml_validation == other.ml_validation \
                 and self.ml_probability == other.ml_probability \
@@ -79,22 +73,12 @@ def _encode(value: Any) -> Any:
         else:
             return value
 
-    def is_api_validation_available(self) -> bool:
-        """Check if current credential candidate can be validated with external API.
-
-        Return:
-            True if any validation available, False otherwise
-
-        """
-        return len(self.validations) > 0
-
     def to_str(self, subtext: bool = False, hashed: bool = False) -> str:
         """Represent candidate with subtext or|and hashed values"""
         return f"rule: {self.rule_name}" \
                f" | severity: {self.severity.value}" \
                f" | confidence: {self.confidence.value}" \
                f" | line_data_list: [{', '.join([x.to_str(subtext, hashed) for x in self.line_data_list])}]" \
-               f" | api_validation: {self.api_validation.name}" \
                f" | ml_validation: {self.ml_validation.name}"
 
     def __str__(self):
@@ -111,7 +95,6 @@ def to_json(self, hashed: bool, subtext: bool) -> Dict:
 
         """
         full_output = {
-            "api_validation": self.api_validation.name,
             "ml_validation": self.ml_validation.name,
             "patterns": [pattern.pattern for pattern in self.patterns],
             "ml_probability": self.ml_probability,