Skip to content

Commit

Permalink
validators removed
Browse files Browse the repository at this point in the history
  • Loading branch information
babenek committed Dec 13, 2024
1 parent cfebac7 commit 752defe
Show file tree
Hide file tree
Showing 57 changed files with 56 additions and 3,319 deletions.
2 changes: 0 additions & 2 deletions credsweeper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@
DataContentProvider, \
TextContentProvider
from credsweeper.ml_model.ml_validator import MlValidator
from credsweeper.validations.apply_validation import ApplyValidation

__all__ = [
'ApplyValidation', #
'ByteContentProvider', #
'ContentProvider', #
'CredSweeper', #
Expand Down
6 changes: 0 additions & 6 deletions credsweeper/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,6 @@ def get_arguments() -> Namespace:
dest="ml_providers",
required=False,
metavar="STR")
parser.add_argument("--api_validation",
help="add credential api validation option to credsweeper pipeline. "
"External API is used to reduce FP for some rule types.",
dest="api_validation",
action="store_true")
parser.add_argument("--jobs",
"-j",
help="number of parallel processes to use (default: 1)",
Expand Down Expand Up @@ -296,7 +291,6 @@ def scan(args: Namespace, content_provider: AbstractProvider, json_filename: Opt

credsweeper = CredSweeper(rule_path=args.rule_path,
config_path=args.config_path,
api_validation=args.api_validation,
json_filename=json_filename,
xlsx_filename=xlsx_filename,
hashed=args.hashed,
Expand Down
23 changes: 1 addition & 22 deletions credsweeper/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from credsweeper.file_handler.text_content_provider import TextContentProvider
from credsweeper.scanner import Scanner
from credsweeper.utils import Util
from credsweeper.validations.apply_validation import ApplyValidation

logger = logging.getLogger(__name__)

Expand All @@ -39,7 +38,6 @@ class CredSweeper:
def __init__(self,
rule_path: Union[None, str, Path] = None,
config_path: Optional[str] = None,
api_validation: bool = False,
json_filename: Union[None, str, Path] = None,
xlsx_filename: Union[None, str, Path] = None,
hashed: bool = False,
Expand Down Expand Up @@ -67,8 +65,6 @@ def __init__(self,
validation was the grained candidate model on machine learning
config_path: optional str variable, path of CredSweeper config file
default built-in config is used if None
api_validation: optional boolean variable, specifying the need of
parallel API validation
json_filename: optional string variable, path to save result
to json
xlsx_filename: optional string variable, path to save result
Expand Down Expand Up @@ -97,7 +93,6 @@ def __init__(self,
raise RuntimeError(f"Severity level provided: {severity}"
f" -- must be one of: {' | '.join([i.value for i in Severity])}")
config_dict = self._get_config_dict(config_path=config_path,
api_validation=api_validation,
use_filters=use_filters,
find_by_ext=find_by_ext,
depth=depth,
Expand Down Expand Up @@ -137,7 +132,6 @@ def _get_config_path(config_path: Optional[str]) -> Path:
def _get_config_dict(
self, #
config_path: Optional[str], #
api_validation: bool, #
use_filters: bool, #
find_by_ext: bool, #
depth: int, #
Expand All @@ -147,8 +141,6 @@ def _get_config_dict(
exclude_lines: Optional[List[str]], #
exclude_values: Optional[List[str]]) -> Dict[str, Any]:
config_dict = Util.json_load(self._get_config_path(config_path))
config_dict["validation"] = {}
config_dict["validation"]["api_validation"] = api_validation
config_dict["use_filters"] = use_filters
config_dict["find_by_ext"] = find_by_ext
config_dict["size_limit"] = size_limit
Expand Down Expand Up @@ -268,14 +260,7 @@ def scan(self, content_providers: Sequence[Union[DiffContentProvider, TextConten
def __single_job_scan(self, content_providers: Sequence[Union[DiffContentProvider, TextContentProvider]]) -> None:
"""Performs scan in main thread"""
all_cred = self.files_scan(content_providers)
if self.config.api_validation:
api_validation = ApplyValidation()
for cred in all_cred:
logger.info("Run API Validation")
cred.api_validation = api_validation.validate(cred)
self.credential_manager.add_credential(cred)
else:
self.credential_manager.set_credentials(all_cred)
self.credential_manager.set_credentials(all_cred)

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

Expand All @@ -289,8 +274,6 @@ def __multi_jobs_scan(self, content_providers: Sequence[Union[DiffContentProvide
if "SILENCE" == self.__log_level:
logging.addLevelName(60, "SILENCE")
log_kwargs["level"] = self.__log_level
# providers_map: List[Sequence[Union[DiffContentProvider, TextContentProvider]]] = \
# [content_providers[x::self.pool_count] for x in range(self.pool_count)]
with multiprocessing.get_context("spawn").Pool(processes=self.pool_count,
initializer=self.pool_initializer,
initargs=(log_kwargs, )) as pool:
Expand All @@ -299,10 +282,6 @@ def __multi_jobs_scan(self, content_providers: Sequence[Union[DiffContentProvide
for x in range(self.pool_count))):
for cred in scan_results:
self.credential_manager.add_credential(cred)
if self.config.api_validation:
logger.info("Run API Validation")
api_validation = ApplyValidation()
api_validation.validate_credentials(pool, self.credential_manager)
except KeyboardInterrupt:
pool.terminate()
pool.join()
Expand Down
1 change: 0 additions & 1 deletion credsweeper/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def __init__(self, config: Dict[str, Any]) -> None:
self.check_for_literals: bool = config["check_for_literals"]
self.not_allowed_path_pattern = re.compile(f"{Util.get_regex_combine_or(self.NOT_ALLOWED_PATH)}",
flags=re.IGNORECASE)
self.api_validation: bool = config["validation"]["api_validation"]
self.use_filters: bool = config["use_filters"]
self.line_data_output: List[str] = config["line_data_output"]
self.candidate_output: List[str] = config["candidate_output"]
Expand Down
17 changes: 0 additions & 17 deletions credsweeper/credentials/candidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from credsweeper.common.constants import KeyValidationOption, Severity, Confidence
from credsweeper.config import Config
from credsweeper.credentials.line_data import LineData
from credsweeper.validations.validation import Validation


class Candidate:
Expand All @@ -31,19 +30,15 @@ def __init__(self,
rule_name: str,
severity: Severity,
config: Optional[Config] = None,
validations: List[Validation] = None,
use_ml: bool = False,
confidence: Confidence = Confidence.MODERATE) -> None:
self.line_data_list = line_data_list
self.patterns = patterns
self.rule_name = rule_name
self.severity = severity
self.config = config
self.validations: List[Validation] = validations if validations is not None else []
self.use_ml = use_ml
self.confidence = confidence

self.api_validation = KeyValidationOption.NOT_AVAILABLE
self.ml_validation = KeyValidationOption.NOT_AVAILABLE
self.ml_probability: Optional[float] = None

Expand All @@ -52,7 +47,6 @@ def compare(self, other: 'Candidate') -> bool:
if self.rule_name == other.rule_name \
and self.severity == other.severity \
and self.confidence == other.confidence \
and self.api_validation == other.api_validation \
and self.use_ml == other.use_ml \
and self.ml_validation == other.ml_validation \
and self.ml_probability == other.ml_probability \
Expand All @@ -79,22 +73,12 @@ def _encode(value: Any) -> Any:
else:
return value

def is_api_validation_available(self) -> bool:
"""Check if current credential candidate can be validated with external API.
Return:
True if any validation available, False otherwise
"""
return len(self.validations) > 0

def to_str(self, subtext: bool = False, hashed: bool = False) -> str:
"""Represent candidate with subtext or|and hashed values"""
return f"rule: {self.rule_name}" \
f" | severity: {self.severity.value}" \
f" | confidence: {self.confidence.value}" \
f" | line_data_list: [{', '.join([x.to_str(subtext, hashed) for x in self.line_data_list])}]" \
f" | api_validation: {self.api_validation.name}" \
f" | ml_validation: {self.ml_validation.name}"

def __str__(self):
Expand All @@ -111,7 +95,6 @@ def to_json(self, hashed: bool, subtext: bool) -> Dict:
"""
full_output = {
"api_validation": self.api_validation.name,
"ml_validation": self.ml_validation.name,
"patterns": [pattern.pattern for pattern in self.patterns],
"ml_probability": self.ml_probability,
Expand Down
56 changes: 4 additions & 52 deletions credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,6 @@
- (?i)((git)[0-9A-Za-z_-]{0,80}(token|key|api)[0-9A-Za-z_-]{0,80}(\s)*(=|:|:=)(\s)*(["']?)(?P<value>[0-9a-z]{40})(["']?))
filter_type: GeneralPattern
use_ml: true
validations:
- GithubTokenValidation
required_substrings:
- git
min_line_len: 47
Expand All @@ -272,8 +270,6 @@
values:
- (?:(?<![0-9A-Za-z_])|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P<value>AIza[0-9A-Za-z_-]{35})
filter_type: GeneralPattern
validations:
- GoogleApiKeyValidation
required_substrings:
- AIza
min_line_len: 39
Expand All @@ -289,8 +285,6 @@
- (?P<value>[0-9]{3,80}-[0-9a-z_]{32}\.apps\.googleusercontent\.com)
- \b(?P<value>GOCSPX-[0-9A-Za-z_-]{28}|((?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9_-])){24,80}(?(a)(?(b)(?(c)\b|(?!x)x)|(?!x)x)|(?!x)x))
filter_type: GeneralPattern
validations:
- GoogleMultiValidation
required_substrings:
- .apps.googleusercontent.com
min_line_len: 40
Expand Down Expand Up @@ -376,8 +370,6 @@
values:
- (?:(?<![0-9A-Za-z_-])|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P<value>[0-9A-Za-z_-]{32}-us[0-9]{1,2})
filter_type: GeneralPattern
validations:
- MailChimpKeyValidation
required_substrings:
- -us
min_line_len: 35
Expand Down Expand Up @@ -534,8 +526,6 @@
values:
- (?:(?<![0-9A-Za-z_-])|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P<value>xox[aboprst]\-[0-9A-Za-z-]{10,250})(?![0-9A-Za-z_-])
filter_type: GeneralPattern
validations:
- SlackTokenValidation
required_substrings:
- xox
min_line_len: 15
Expand All @@ -557,31 +547,15 @@
- code
- doc

- name: Stripe Standard API Key
- name: Stripe API Key
severity: high
confidence: strong
type: pattern
values:
- (?P<value>sk_live_[0-9A-Za-z_-]{24})(?![0-9A-Za-z_-])
- (?P<value>[prs]k_live_[0-9A-Za-z_-]{24})(?![0-9A-Za-z_-])
filter_type: GeneralPattern
validations:
- StripeApiKeyValidation
required_substrings:
- sk_live_
min_line_len: 32
target:
- code
- doc

- name: Stripe Restricted API Key
severity: high
confidence: strong
type: pattern
values:
- (?P<value>rk_live_[0-9A-Za-z_-]{24})(?![0-9A-Za-z_-])
filter_type: GeneralPattern
required_substrings:
- rk_live_
- k_live_
min_line_len: 32
target:
- code
Expand All @@ -596,45 +570,27 @@
filter_type:
- ValuePatternCheck
- ValueBase64PartCheck
validations:
- SquareAccessTokenValidation
required_substrings:
- EAAA
min_line_len: 64
target:
- code
- doc

- name: Square Client ID
- name: Square Credentials
severity: medium
confidence: strong
type: pattern
values:
- (?:(?<![0-9A-Za-z_-])|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P<value>sq0[a-z]{3}-[0-9A-Za-z_-]{22})(?![0-9A-Za-z_-])
filter_type: GeneralPattern
validations:
- SquareClientIdValidation
required_substrings:
- sq0
min_line_len: 29
target:
- code
- doc

- name: Square OAuth Secret
severity: high
confidence: strong
type: pattern
values:
- (?:(?<![0-9A-Za-z_-])|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P<value>sq0csp-[0-9A-Za-z_-]{43})(?![0-9A-Za-z_-])
filter_type: GeneralPattern
required_substrings:
- sq0csp
min_line_len: 50
target:
- code
- doc

- name: Token
severity: medium
confidence: moderate
Expand Down Expand Up @@ -821,8 +777,6 @@
- (?:(?<![0-9A-Za-z_-])|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P<value>gh[pousr]_[0-9A-Za-z_-]{36,255})
filter_type:
- ValueGitHubCheck
validations:
- GithubTokenValidation
required_substrings:
- ghp_
- gho_
Expand All @@ -841,8 +795,6 @@
values:
- (?:(?<![0-9A-Za-z_-])|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P<value>github_pat_[0-9A-Za-z_]{80,255})
filter_type: GeneralPattern
validations:
- GithubTokenValidation
required_substrings:
- github_pat_
min_line_len: 90
Expand Down
Loading

0 comments on commit 752defe

Please sign in to comment.