Skip to content

Commit

Permalink
Credential colorization in stdout export (#632)
Browse files Browse the repository at this point in the history
* output with colorization

* missed types lib, import optimization, extend test

* fix

* fix

* fix
  • Loading branch information
babenek authored Dec 13, 2024
1 parent cfebac7 commit 3938c7d
Show file tree
Hide file tree
Showing 14 changed files with 81 additions and 9 deletions.
2 changes: 2 additions & 0 deletions credsweeper/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ def get_arguments() -> Namespace:
const="output.xlsx",
dest="xlsx_filename",
metavar="PATH")
parser.add_argument("--color", "-C", help="print results with colorization", action="store_const", const=True)
parser.add_argument("--hashed",
help="line, variable, value will be hashed in output",
action="store_const",
Expand Down Expand Up @@ -299,6 +300,7 @@ def scan(args: Namespace, content_provider: AbstractProvider, json_filename: Opt
api_validation=args.api_validation,
json_filename=json_filename,
xlsx_filename=xlsx_filename,
color=args.color,
hashed=args.hashed,
subtext=args.subtext,
sort_output=args.sort_output,
Expand Down
13 changes: 13 additions & 0 deletions credsweeper/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Any, List, Optional, Union, Dict, Sequence, Tuple

import pandas as pd
from colorama import Style

# Directory of credsweeper sources MUST be placed before imports to avoid circular import error
APP_PATH = Path(__file__).resolve().parent
Expand Down Expand Up @@ -42,6 +43,7 @@ def __init__(self,
api_validation: bool = False,
json_filename: Union[None, str, Path] = None,
xlsx_filename: Union[None, str, Path] = None,
color: bool = False,
hashed: bool = False,
subtext: bool = False,
sort_output: bool = False,
Expand Down Expand Up @@ -73,6 +75,7 @@ def __init__(self,
to json
xlsx_filename: optional string variable, path to save result
to xlsx
color: print results to stdout with colorization
hashed: use hash of line, value and variable instead plain text
subtext: use subtext of line near variable-value like it performed in ML
use_filters: boolean variable, specifying the need of rule filters
Expand Down Expand Up @@ -112,6 +115,7 @@ def __init__(self,
self.credential_manager = CredentialManager()
self.json_filename: Union[None, str, Path] = json_filename
self.xlsx_filename: Union[None, str, Path] = xlsx_filename
self.color = color
self.hashed = hashed
self.subtext = subtext
self.sort_output = sort_output
Expand Down Expand Up @@ -427,6 +431,15 @@ def export_results(self) -> None:
df = pd.DataFrame(data=data_list)
df.to_excel(self.xlsx_filename, index=False)

if self.color:
is_exported = True
for credential in credentials:
for line_data in credential.line_data_list:
# bright rule name and path or info
print(Style.BRIGHT + credential.rule_name +
f" {line_data.info or line_data.path}:{line_data.line_num}" + Style.RESET_ALL)
print(line_data.get_colored_line(hashed=self.hashed, subtext=self.subtext))

if is_exported is False:
for credential in credentials:
print(credential.to_str(hashed=self.hashed, subtext=self.subtext))
37 changes: 37 additions & 0 deletions credsweeper/credentials/line_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from functools import cached_property
from typing import Any, Dict, Optional, Tuple

from colorama import Fore, Style

from credsweeper.common.constants import MAX_LINE_LENGTH, UTF_8, StartEnd, ML_HUNK
from credsweeper.config import Config
from credsweeper.utils import Util
Expand Down Expand Up @@ -414,3 +416,38 @@ def to_json(self, hashed: bool, subtext: bool) -> Dict:
}
reported_output = {k: v for k, v in full_output.items() if k in self.config.line_data_output}
return reported_output

def get_colored_line(self, hashed: bool, subtext: bool = False) -> str:
"""Represents the LineData with a value, separator, and variable color formatting"""
if hashed:
# return colored hash
return Fore.LIGHTGREEN_EX \
+ self.get_hash_or_subtext(self.line, hashed,
StartEnd(self.value_start, self.value_end) if subtext else None) \
+ Style.RESET_ALL
# at least, value must present
line = self.line[:self.value_start] \
+ Fore.LIGHTYELLOW_EX \
+ self.line[self.value_start:self.value_end] \
+ Style.RESET_ALL \
+ self.line[self.value_end:] # noqa: E127
# separator may be missing
if 0 <= self.separator_start < self.separator_end <= self.value_start:
line = line[:self.separator_start] \
+ Fore.LIGHTGREEN_EX \
+ line[self.separator_start:self.separator_end] \
+ Style.RESET_ALL \
+ line[self.separator_end:]
# variable may be missing
if 0 <= self.separator_start \
and 0 <= self.variable_start < self.variable_end <= self.separator_end <= self.value_start \
or 0 <= self.variable_start < self.variable_end <= self.value_start:
line = line[:self.variable_start] \
+ Fore.LIGHTBLUE_EX \
+ line[self.variable_start:self.variable_end] \
+ Style.RESET_ALL \
+ line[self.variable_end:]
if subtext:
# display part of the text, centered around the start of the value, style reset at the end as a fallback
line = f"{Util.subtext(line, self.value_start + len(line) - len(self.line), ML_HUNK)}{Style.RESET_ALL}"
return line
2 changes: 1 addition & 1 deletion credsweeper/deep_scanner/gzip_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
from pathlib import Path
from typing import List

from credsweeper.utils import Util
from credsweeper.credentials import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.utils import Util

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion credsweeper/file_handler/diff_content_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(
file_path: str, #
change_type: DiffRowType, #
diff: List[DiffDict]) -> None:
super().__init__(file_path=file_path, info=change_type.value)
super().__init__(file_path=file_path, info=f"{file_path}:{change_type.value}")
self.change_type = change_type
self.diff = diff

Expand Down
2 changes: 1 addition & 1 deletion credsweeper/filters/value_camel_case_check.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re

from credsweeper.config import Config
from credsweeper.common import static_keyword_checklist
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/filters/value_file_path_check.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from credsweeper.common.constants import Chars
from credsweeper.common import static_keyword_checklist
from credsweeper.common.constants import Chars
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/ml_model/features/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from credsweeper.ml_model.features.hartley_entropy import HartleyEntropy
from credsweeper.ml_model.features.has_html_tag import HasHtmlTag
from credsweeper.ml_model.features.is_secret_numeric import IsSecretNumeric
from credsweeper.ml_model.features.search_in_attribute import SearchInAttribute
from credsweeper.ml_model.features.reny_entropy import RenyiEntropy
from credsweeper.ml_model.features.rule_name import RuleName
from credsweeper.ml_model.features.search_in_attribute import SearchInAttribute
from credsweeper.ml_model.features.shannon_entropy import ShannonEntropy
from credsweeper.ml_model.features.word_in_line import WordInLine
from credsweeper.ml_model.features.word_in_path import WordInPath
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/ml_model/ml_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
import numpy as np
import onnxruntime as ort

import credsweeper.ml_model.features as features
from credsweeper.common.constants import ThresholdPreset, ML_HUNK
from credsweeper.credentials import Candidate, CandidateKey
import credsweeper.ml_model.features as features
from credsweeper.utils import Util

logger = logging.getLogger(__name__)
Expand Down
3 changes: 2 additions & 1 deletion docs/source/guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Get all argument list:
[--find-by-ext] [--depth POSITIVE_INT] [--no-filters] [--doc] [--ml_threshold FLOAT_OR_STR]
[--ml_batch_size POSITIVE_INT] [--ml_config PATH] [--ml_model PATH] [--ml_providers STR]
[--api_validation] [--jobs POSITIVE_INT] [--skip_ignored] [--save-json [PATH]]
[--save-xlsx [PATH]] [--hashed] [--subtext] [--sort] [--log LOG_LEVEL]
[--save-xlsx [PATH]] [--color] [--hashed] [--subtext] [--sort] [--log LOG_LEVEL]
[--size_limit SIZE_LIMIT]
[--banner] [--version]
options:
Expand Down Expand Up @@ -54,6 +54,7 @@ Get all argument list:
--skip_ignored parse .gitignore files and skip credentials from ignored objects
--save-json [PATH] save result to json file (default: output.json)
--save-xlsx [PATH] save result to xlsx file (default: output.xlsx)
--color, -C print results with colorization
--hashed line, variable, value will be hashed in output
--subtext line text will be stripped in 160 symbols but value and variable are kept
--sort enable output sorting
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ authors = [
dependencies = [
"base58",
"beautifulsoup4>=4.11.0",
"colorama",
"cryptography",
"GitPython",
"google_auth_oauthlib",
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ hatchling==1.26.3
# Common requirements
base58==2.1.1
beautifulsoup4==4.12.3
colorama==0.4.6
cryptography==43.0.3
GitPython==3.1.43
google-auth-oauthlib==1.2.1
Expand Down Expand Up @@ -45,6 +46,7 @@ pytest-cov
pytest-html
pytest-random-order
types-beautifulsoup4
types-colorama
types-PyYAML
types-requests
types-oauthlib
Expand Down
15 changes: 15 additions & 0 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,20 @@ def test_it_works_with_api_p(self) -> None:

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

def test_it_works_with_patch_color_p(self) -> None:
target_path = str(SAMPLES_PATH / "password.patch")
_stdout, _stderr = self._m_credsweeper(["--diff_path", target_path, "--log", "silence", "--color"])
output = " ".join(_stdout.split()[:-1])
expected = """
\x1b[1mPassword .changes/1.16.98.json:added:3\x1b[0m
"\x1b[94mpassword\x1b[0m"\x1b[92m:\x1b[0m "\x1b[93mdkajco1\x1b[0m"
Added File Credentials: 1 Deleted File Credentials: 0 Time Elapsed:
"""
expected = " ".join(expected.split())
self.assertEqual(expected, output)

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

def test_it_works_n(self) -> None:
_stdout, _stderr = self._m_credsweeper([])

Expand Down Expand Up @@ -243,6 +257,7 @@ def test_it_works_n(self) -> None:
" [--skip_ignored]" \
" [--save-json [PATH]]" \
" [--save-xlsx [PATH]]" \
" [--color]" \
" [--hashed]" \
" [--subtext]" \
" [--sort]" \
Expand Down
5 changes: 3 additions & 2 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def test_main_path_p(self, mock_get_arguments) -> None:
diff_path=[str(target_path)],
json_filename=os.path.join(tmp_dir, f"{__name__}.json"),
xlsx_filename=None,
color=False,
subtext=False,
hashed=False,
rule_path=None,
Expand Down Expand Up @@ -450,7 +451,7 @@ def test_tar_n(self) -> None:

def test_aws_multi_p(self) -> None:
content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH / "aws_multi.md"])
cred_sweeper = CredSweeper(ml_threshold=0)
cred_sweeper = CredSweeper(ml_threshold=0, color=True, hashed=True)
cred_sweeper.run(content_provider=content_provider)
for i in cred_sweeper.credential_manager.get_credentials():
if "AWS Multi" == i.rule_name:
Expand Down Expand Up @@ -609,7 +610,7 @@ def test_yaml_n(self) -> None:
def test_encoded_p(self) -> None:
# test for finding credentials in ENCODED data
content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH / "encoded_data"])
cred_sweeper = CredSweeper(depth=5, ml_threshold=0)
cred_sweeper = CredSweeper(depth=5, ml_threshold=0, color=True, subtext=True)
cred_sweeper.run(content_provider=content_provider)
found_credentials = cred_sweeper.credential_manager.get_credentials()
self.assertEqual(2, len(found_credentials))
Expand Down

0 comments on commit 3938c7d

Please sign in to comment.