Skip to content

Commit

Permalink
Filter for line which is UU encoded (#611)
Browse files Browse the repository at this point in the history
* new filer and test

* password applied

* upd rules doc

* fix

* style&etc

* upd
  • Loading branch information
babenek authored Oct 9, 2024
1 parent 20c72e1 commit 8e7191c
Show file tree
Hide file tree
Showing 19 changed files with 638 additions and 236 deletions.
2 changes: 1 addition & 1 deletion credsweeper/common/keyword_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
class KeywordPattern:
"""Pattern set of keyword types"""
key_left = r"(\\[nrt])?"\
r"(?P<variable>(([`'\"]+[^:='\"`}<>\\/&?]*|[^:='\"`}<>\s()\\/&?]*)" \
r"(?P<variable>(([`'\"]+[^:='\"`}<>\\/&?]*|[^:='\"`}<>\s()\\/&?;,]*)" \
r"(?P<keyword>"
# there will be inserted a keyword
key_right = r")" \
Expand Down
1 change: 1 addition & 0 deletions credsweeper/filters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from credsweeper.filters.line_git_binary_check import LineGitBinaryCheck
from credsweeper.filters.line_specific_key_check import LineSpecificKeyCheck
from credsweeper.filters.line_uue_part_check import LineUUEPartCheck
from credsweeper.filters.value_allowlist_check import ValueAllowlistCheck
from credsweeper.filters.value_array_dictionary_check import ValueArrayDictionaryCheck
from credsweeper.filters.value_atlassian_token_check import ValueAtlassianTokenCheck
Expand Down
7 changes: 6 additions & 1 deletion credsweeper/filters/group/password_keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,16 @@
from credsweeper.filters import ValueDictionaryValueLengthCheck, LineGitBinaryCheck
from credsweeper.filters import ValueSplitKeywordCheck
from credsweeper.filters.group import Group
from credsweeper.filters.line_uue_part_check import LineUUEPartCheck


class PasswordKeyword(Group):
"""PasswordKeyword"""

def __init__(self, config: Config) -> None:
super().__init__(config, GroupType.KEYWORD)
self.filters.extend([ValueDictionaryValueLengthCheck(), ValueSplitKeywordCheck(), LineGitBinaryCheck()])
self.filters.extend(
[ValueDictionaryValueLengthCheck(),
ValueSplitKeywordCheck(),
LineGitBinaryCheck(),
LineUUEPartCheck()])
2 changes: 1 addition & 1 deletion credsweeper/filters/line_git_binary_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
True, if need to filter candidate and False if left
"""
if line_data.line is None:
if not line_data.line:
return True
if 66 < target.line_strip_len:
return False
Expand Down
44 changes: 44 additions & 0 deletions credsweeper/filters/line_uue_part_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import re

from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter


class LineUUEPartCheck(Filter):
"""Checks that line is not a part of UU encoding only for maximal line"""
uue_string = re.compile(r"^M[!-`]{60}$")

def __init__(self, config: Config = None) -> None:
pass

def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
"""Run filter checks on received credential candidate data 'line_data'.
Args:
line_data: credential candidate data
target: multiline target from which line data was obtained
Return:
True, if need to filter candidate and False if left
"""
if not line_data.line:
return True
if 61 != target.line_len:
return False
line = target.line
if LineUUEPartCheck.uue_string.match(line):
# to be sure - check two lines: before and/or after
if 0 < line_data.line_pos:
previous_line = target.lines[line_data.line_pos - 1]
if LineUUEPartCheck.uue_string.match(previous_line):
return True

if len(target.lines) > 1 + line_data.line_pos:
next_line = target.lines[line_data.line_pos + 1]
if LineUUEPartCheck.uue_string.match(next_line):
return True

return False
10 changes: 4 additions & 6 deletions credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@
- token
- secret
- key
- ":"
- "/"
- "="
-
- 암호
- 암호화
Expand All @@ -35,15 +32,16 @@
- ValuePatternCheck
- ValueDictionaryKeywordCheck
- LineGitBinaryCheck
- LineUUEPartCheck
- ValueFilePathCheck
- ValueHexNumberCheck
min_line_len: 10
required_substrings:
- pass
- sword
- ":"
- "/"
- "="
- pw
- p/w
- paasw
- 비밀번호
- 비번
- 패스워드
Expand Down
6 changes: 3 additions & 3 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
NEGLIGIBLE_ML_THRESHOLD = 0.0001

# credentials count after scan
SAMPLES_CRED_COUNT: int = 396
SAMPLES_CRED_LINE_COUNT: int = 414
SAMPLES_CRED_COUNT: int = 397
SAMPLES_CRED_LINE_COUNT: int = 415

# credentials count after post-processing
SAMPLES_POST_CRED_COUNT: int = 349
SAMPLES_POST_CRED_COUNT: int = 354

# with option --doc
SAMPLES_IN_DOC = 430
Expand Down
Loading

0 comments on commit 8e7191c

Please sign in to comment.