Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removed api_validations #633

Merged
merged 5 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions .ci/benchmark.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
META MD5 d51d1f5107d0906adfd81b9fd6467597
DATA MD5 5e46a76147ee32073b0d587f80684f86
META MD5 30ecf5f4796a36b60ca12cb702152bab
DATA MD5 9ac09dae7d8873d53e1fbf18da2d71c4
DATA: 16329853 interested lines. MARKUP: 59549 items
FileType FileNumber ValidLines Positives Negatives Templates
--------------- ------------ ------------ ----------- ----------- -----------
Expand Down Expand Up @@ -55,7 +55,7 @@ FileType FileNumber ValidLines Positives Negatives Templat
.erb 13 323 26
.erl 4 96 7
.ex 25 4968 5 98 5
.example 17 1838 69 38 51
.example 17 1838 74 36 51
.exs 24 4842 8 187 4
.ext 5 211 1 4 2
.fsproj 1 75 1 2
Expand Down Expand Up @@ -222,8 +222,8 @@ FileType FileNumber ValidLines Positives Negatives Templat
.yml 418 36057 522 910 376
.zsh 6 872 12
.zsh-theme 1 97 1
TOTAL: 10003 16329853 11851 46611 5084
credsweeper result_cnt : 11610, lost_cnt : 0, true_cnt : 11346, false_cnt : 264
TOTAL: 10003 16329853 11856 46609 5084
credsweeper result_cnt : 11613, lost_cnt : 0, true_cnt : 11349, false_cnt : 264
Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1
------------------------------ ----------- ----------- ----------- ---------- ----- ---- ----- ---- -------- -------- -------- -------- -------- --------
API 130 3166 188 125 123 2 3352 7 0.000596 0.053846 0.997417 0.984000 0.946154 0.964706
Expand All @@ -236,7 +236,7 @@ Azure Access Token 19 0 0 1
BASE64 Private Key 12 4 0 12 12 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
BASE64 encoded PEM Private Key 7 0 0 5 5 0 0 2 0.285714 0.714286 1.000000 0.714286 0.833333
Bitbucket Client ID 19 53 0 75 19 53 0 0 1.000000 0.000000 0.263889 0.263889 1.000000 0.417582
Bitbucket Client Secret 27 66 1 97 27 67 0 0 1.000000 0.000000 0.287234 0.287234 1.000000 0.446281
Bitbucket Client Secret 28 66 1 98 28 67 0 0 1.000000 0.000000 0.294737 0.294737 1.000000 0.455285
CMD ConvertTo-SecureString 13 4 0 13 13 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
CMD Password 21 128 6 18 18 0 134 3 0.000000 0.142857 0.980645 1.000000 0.857143 0.923077
CMD Secret 1 1 0 1 1 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
Expand All @@ -257,7 +257,7 @@ Grafana Provisioned API Key 22 1 0
JSON Web Token 170 61 0 131 131 0 61 39 0.000000 0.229412 0.831169 1.000000 0.770588 0.870432
Jira / Confluence PAT token 0 4 0 0 0 4 0 0.000000 1.000000
Jira 2FA 15 6 1 12 12 0 7 3 0.000000 0.200000 0.863636 1.000000 0.800000 0.888889
Key 3909 15717 485 3944 3893 51 16151 16 0.003148 0.004093 0.996668 0.987069 0.995907 0.991468
Key 3911 15715 485 3944 3893 51 16149 18 0.003148 0.004602 0.996569 0.987069 0.995398 0.991216
Nonce 93 49 0 91 90 1 48 3 0.020408 0.032258 0.971831 0.989011 0.967742 0.978261
Other 9 7447 5 0 0 7452 9 0.000000 1.000000 0.998794 0.000000
PEM Private Key 1019 1483 0 1023 1019 4 1479 0 0.002697 0.000000 0.998401 0.996090 1.000000 0.998041
Expand All @@ -266,9 +266,10 @@ Salt 47 76 1 4
Secret 1297 1576 802 1288 1283 5 2373 14 0.002103 0.010794 0.994830 0.996118 0.989206 0.992650
Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000
Slack Token 4 1 0 4 4 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
Stripe Credentials 2 0 0 2 2 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
Tencent WeChat API App ID 6 0 0 6 6 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
Token 644 4170 454 617 615 2 4622 29 0.000433 0.045031 0.994115 0.996759 0.954969 0.975416
Twilio Credentials 30 39 0 30 30 0 39 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
URL Credentials 210 157 215 205 205 0 372 5 0.000000 0.023810 0.991409 1.000000 0.976190 0.987952
UUID 1075 265 0 1074 1073 1 264 2 0.003774 0.001860 0.997761 0.999069 0.998140 0.998604
11851 46611 5084 11623 11346 264 46347 505 0.005664 0.042612 0.986846 0.977261 0.957388 0.967222
11856 46609 5084 11626 11349 264 46345 507 0.005664 0.042763 0.986813 0.977267 0.957237 0.967148
2 changes: 0 additions & 2 deletions credsweeper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@
DataContentProvider, \
TextContentProvider
from credsweeper.ml_model.ml_validator import MlValidator
from credsweeper.validations.apply_validation import ApplyValidation

__all__ = [
'ApplyValidation', #
'ByteContentProvider', #
'ContentProvider', #
'CredSweeper', #
Expand Down
6 changes: 0 additions & 6 deletions credsweeper/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,6 @@ def get_arguments() -> Namespace:
dest="ml_providers",
required=False,
metavar="STR")
parser.add_argument("--api_validation",
help="add credential api validation option to credsweeper pipeline. "
"External API is used to reduce FP for some rule types.",
dest="api_validation",
action="store_true")
parser.add_argument("--jobs",
"-j",
help="number of parallel processes to use (default: 1)",
Expand Down Expand Up @@ -297,7 +292,6 @@ def scan(args: Namespace, content_provider: AbstractProvider, json_filename: Opt

credsweeper = CredSweeper(rule_path=args.rule_path,
config_path=args.config_path,
api_validation=args.api_validation,
json_filename=json_filename,
xlsx_filename=xlsx_filename,
color=args.color,
Expand Down
23 changes: 1 addition & 22 deletions credsweeper/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from credsweeper.file_handler.text_content_provider import TextContentProvider
from credsweeper.scanner import Scanner
from credsweeper.utils import Util
from credsweeper.validations.apply_validation import ApplyValidation

logger = logging.getLogger(__name__)

Expand All @@ -40,7 +39,6 @@ class CredSweeper:
def __init__(self,
rule_path: Union[None, str, Path] = None,
config_path: Optional[str] = None,
api_validation: bool = False,
json_filename: Union[None, str, Path] = None,
xlsx_filename: Union[None, str, Path] = None,
color: bool = False,
Expand Down Expand Up @@ -69,8 +67,6 @@ def __init__(self,
validation was the grained candidate model on machine learning
config_path: optional str variable, path of CredSweeper config file
default built-in config is used if None
api_validation: optional boolean variable, specifying the need of
parallel API validation
json_filename: optional string variable, path to save result
to json
xlsx_filename: optional string variable, path to save result
Expand Down Expand Up @@ -100,7 +96,6 @@ def __init__(self,
raise RuntimeError(f"Severity level provided: {severity}"
f" -- must be one of: {' | '.join([i.value for i in Severity])}")
config_dict = self._get_config_dict(config_path=config_path,
api_validation=api_validation,
use_filters=use_filters,
find_by_ext=find_by_ext,
depth=depth,
Expand Down Expand Up @@ -141,7 +136,6 @@ def _get_config_path(config_path: Optional[str]) -> Path:
def _get_config_dict(
self, #
config_path: Optional[str], #
api_validation: bool, #
use_filters: bool, #
find_by_ext: bool, #
depth: int, #
Expand All @@ -151,8 +145,6 @@ def _get_config_dict(
exclude_lines: Optional[List[str]], #
exclude_values: Optional[List[str]]) -> Dict[str, Any]:
config_dict = Util.json_load(self._get_config_path(config_path))
config_dict["validation"] = {}
config_dict["validation"]["api_validation"] = api_validation
config_dict["use_filters"] = use_filters
config_dict["find_by_ext"] = find_by_ext
config_dict["size_limit"] = size_limit
Expand Down Expand Up @@ -272,14 +264,7 @@ def scan(self, content_providers: Sequence[Union[DiffContentProvider, TextConten
def __single_job_scan(self, content_providers: Sequence[Union[DiffContentProvider, TextContentProvider]]) -> None:
"""Performs scan in main thread"""
all_cred = self.files_scan(content_providers)
if self.config.api_validation:
api_validation = ApplyValidation()
for cred in all_cred:
logger.info("Run API Validation")
cred.api_validation = api_validation.validate(cred)
self.credential_manager.add_credential(cred)
else:
self.credential_manager.set_credentials(all_cred)
self.credential_manager.set_credentials(all_cred)

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

Expand All @@ -293,8 +278,6 @@ def __multi_jobs_scan(self, content_providers: Sequence[Union[DiffContentProvide
if "SILENCE" == self.__log_level:
logging.addLevelName(60, "SILENCE")
log_kwargs["level"] = self.__log_level
# providers_map: List[Sequence[Union[DiffContentProvider, TextContentProvider]]] = \
# [content_providers[x::self.pool_count] for x in range(self.pool_count)]
with multiprocessing.get_context("spawn").Pool(processes=self.pool_count,
initializer=self.pool_initializer,
initargs=(log_kwargs, )) as pool:
Expand All @@ -303,10 +286,6 @@ def __multi_jobs_scan(self, content_providers: Sequence[Union[DiffContentProvide
for x in range(self.pool_count))):
for cred in scan_results:
self.credential_manager.add_credential(cred)
if self.config.api_validation:
logger.info("Run API Validation")
api_validation = ApplyValidation()
api_validation.validate_credentials(pool, self.credential_manager)
except KeyboardInterrupt:
pool.terminate()
pool.join()
Expand Down
1 change: 0 additions & 1 deletion credsweeper/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def __init__(self, config: Dict[str, Any]) -> None:
self.check_for_literals: bool = config["check_for_literals"]
self.not_allowed_path_pattern = re.compile(f"{Util.get_regex_combine_or(self.NOT_ALLOWED_PATH)}",
flags=re.IGNORECASE)
self.api_validation: bool = config["validation"]["api_validation"]
self.use_filters: bool = config["use_filters"]
self.line_data_output: List[str] = config["line_data_output"]
self.candidate_output: List[str] = config["candidate_output"]
Expand Down
17 changes: 0 additions & 17 deletions credsweeper/credentials/candidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from credsweeper.common.constants import KeyValidationOption, Severity, Confidence
from credsweeper.config import Config
from credsweeper.credentials.line_data import LineData
from credsweeper.validations.validation import Validation


class Candidate:
Expand All @@ -31,19 +30,15 @@ def __init__(self,
rule_name: str,
severity: Severity,
config: Optional[Config] = None,
validations: List[Validation] = None,
use_ml: bool = False,
confidence: Confidence = Confidence.MODERATE) -> None:
self.line_data_list = line_data_list
self.patterns = patterns
self.rule_name = rule_name
self.severity = severity
self.config = config
self.validations: List[Validation] = validations if validations is not None else []
self.use_ml = use_ml
self.confidence = confidence

self.api_validation = KeyValidationOption.NOT_AVAILABLE
self.ml_validation = KeyValidationOption.NOT_AVAILABLE
self.ml_probability: Optional[float] = None

Expand All @@ -52,7 +47,6 @@ def compare(self, other: 'Candidate') -> bool:
if self.rule_name == other.rule_name \
and self.severity == other.severity \
and self.confidence == other.confidence \
and self.api_validation == other.api_validation \
and self.use_ml == other.use_ml \
and self.ml_validation == other.ml_validation \
and self.ml_probability == other.ml_probability \
Expand All @@ -79,22 +73,12 @@ def _encode(value: Any) -> Any:
else:
return value

def is_api_validation_available(self) -> bool:
"""Check if current credential candidate can be validated with external API.

Return:
True if any validation available, False otherwise

"""
return len(self.validations) > 0

def to_str(self, subtext: bool = False, hashed: bool = False) -> str:
"""Represent candidate with subtext or|and hashed values"""
return f"rule: {self.rule_name}" \
f" | severity: {self.severity.value}" \
f" | confidence: {self.confidence.value}" \
f" | line_data_list: [{', '.join([x.to_str(subtext, hashed) for x in self.line_data_list])}]" \
f" | api_validation: {self.api_validation.name}" \
f" | ml_validation: {self.ml_validation.name}"

def __str__(self):
Expand All @@ -111,7 +95,6 @@ def to_json(self, hashed: bool, subtext: bool) -> Dict:

"""
full_output = {
"api_validation": self.api_validation.name,
"ml_validation": self.ml_validation.name,
"patterns": [pattern.pattern for pattern in self.patterns],
"ml_probability": self.ml_probability,
Expand Down
Loading
Loading