From 385c0d8efeaa0715966fe0f4bee4ca86fce0a971 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Fri, 13 Dec 2024 07:28:27 +0200 Subject: [PATCH] output with colorization --- credsweeper/__main__.py | 2 ++ credsweeper/app.py | 19 +++++++++++ credsweeper/credentials/line_data.py | 34 +++++++++++++++++++ .../file_handler/diff_content_provider.py | 2 +- docs/source/guide.rst | 3 +- pyproject.toml | 1 + requirements.txt | 1 + tests/test_app.py | 15 ++++++++ tests/test_main.py | 5 +-- 9 files changed, 78 insertions(+), 4 deletions(-) diff --git a/credsweeper/__main__.py b/credsweeper/__main__.py index f025e5d51..b607a40a5 100644 --- a/credsweeper/__main__.py +++ b/credsweeper/__main__.py @@ -224,6 +224,7 @@ def get_arguments() -> Namespace: const="output.xlsx", dest="xlsx_filename", metavar="PATH") + parser.add_argument("--color", "-C", help="print results with colorization", action="store_const", const=True) parser.add_argument("--hashed", help="line, variable, value will be hashed in output", action="store_const", @@ -299,6 +300,7 @@ def scan(args: Namespace, content_provider: AbstractProvider, json_filename: Opt api_validation=args.api_validation, json_filename=json_filename, xlsx_filename=xlsx_filename, + color=args.color, hashed=args.hashed, subtext=args.subtext, sort_output=args.sort_output, diff --git a/credsweeper/app.py b/credsweeper/app.py index f60b28394..9373529b7 100644 --- a/credsweeper/app.py +++ b/credsweeper/app.py @@ -5,6 +5,8 @@ from typing import Any, List, Optional, Union, Dict, Sequence, Tuple import pandas as pd +from colorama import Fore +from colorama.ansi import AnsiStyle, Style # Directory of credsweeper sources MUST be placed before imports to avoid circular import error APP_PATH = Path(__file__).resolve().parent @@ -42,6 +44,7 @@ def __init__(self, api_validation: bool = False, json_filename: Union[None, str, Path] = None, xlsx_filename: Union[None, str, Path] = None, + color: bool = False, hashed: bool = False, subtext: bool = False, sort_output: bool = False, @@ -73,6 +76,7 @@ def __init__(self, to json xlsx_filename: optional string variable, path to save result to xlsx + color: print results to stdout with colorization hashed: use hash of line, value and variable instead plain text subtext: use subtext of line near variable-value like it performed in ML use_filters: boolean variable, specifying the need of rule filters @@ -112,6 +116,7 @@ def __init__(self, self.credential_manager = CredentialManager() self.json_filename: Union[None, str, Path] = json_filename self.xlsx_filename: Union[None, str, Path] = xlsx_filename + self.color = color self.hashed = hashed self.subtext = subtext self.sort_output = sort_output @@ -427,6 +432,20 @@ def export_results(self) -> None: df = pd.DataFrame(data=data_list) df.to_excel(self.xlsx_filename, index=False) + if self.color: + is_exported = True + for credential in credentials: + for line_data in credential.line_data_list: + print(Style.BRIGHT + credential.rule_name \ + + f" {line_data.info or line_data.path}:{line_data.line_num}" + + Style.RESET_ALL) + if self.hashed: + print(Fore.LIGHTGREEN_EX \ + + line_data.get_hash_or_subtext(line_data.line, self.hashed) \ + + Style.RESET_ALL) + else: + print(f"{line_data.get_colored_line(self.subtext)}") + if is_exported is False: for credential in credentials: print(credential.to_str(hashed=self.hashed, subtext=self.subtext)) diff --git a/credsweeper/credentials/line_data.py b/credsweeper/credentials/line_data.py index 92f801484..761724d8f 100644 --- a/credsweeper/credentials/line_data.py +++ b/credsweeper/credentials/line_data.py @@ -5,6 +5,8 @@ from functools import cached_property from typing import Any, Dict, Optional, Tuple +from colorama import Fore, Style, Back + from credsweeper.common.constants import MAX_LINE_LENGTH, UTF_8, StartEnd, ML_HUNK from credsweeper.config import Config from credsweeper.utils import Util @@ -414,3 +416,35 @@ def to_json(self, hashed: bool, subtext: bool) -> Dict: } reported_output = {k: v for k, v in full_output.items() if k in self.config.line_data_output} return reported_output + + def get_colored_line(self, subtext: bool = False) -> str: + # at least, value must present + line = self.line[:self.value_start] \ + + Fore.LIGHTYELLOW_EX \ + + self.line[self.value_start:self.value_end] \ + + Style.RESET_ALL \ + + self.line[self.value_end:] + # separator may be missing + if 0 <= self.separator_start < self.separator_end <= self.value_start: + line = line[:self.separator_start] \ + + Fore.LIGHTGREEN_EX \ + + line[self.separator_start:self.separator_end] \ + + Style.RESET_ALL \ + + line[self.separator_end:] + # variable may be missing + if 0 <= self.separator_start \ + and 0 <= self.variable_start < self.variable_end <= self.separator_end <= self.value_start \ + or 0 <= self.variable_start < self.variable_end <= self.value_start: + line = line[:self.variable_start] \ + + Fore.LIGHTBLUE_EX \ + + line[self.variable_start:self.variable_end] \ + + Style.RESET_ALL \ + + line[self.variable_end:] + if subtext: + # display part of the text, centered around the start of the value + line = Util.subtext(line, self.value_start + len(line) - len(self.line), ML_HUNK) + # put style reset at the end as a fallback + return f"{line}{Style.RESET_ALL}" + else: + # show whole line + return line diff --git a/credsweeper/file_handler/diff_content_provider.py b/credsweeper/file_handler/diff_content_provider.py index f4dc1cb93..9669cdbde 100644 --- a/credsweeper/file_handler/diff_content_provider.py +++ b/credsweeper/file_handler/diff_content_provider.py @@ -31,7 +31,7 @@ def __init__( file_path: str, # change_type: DiffRowType, # diff: List[DiffDict]) -> None: - super().__init__(file_path=file_path, info=change_type.value) + super().__init__(file_path=file_path, info=f"{file_path}:{change_type.value}") self.change_type = change_type self.diff = diff diff --git a/docs/source/guide.rst b/docs/source/guide.rst index ebebbc67f..62c41d7bf 100644 --- a/docs/source/guide.rst +++ b/docs/source/guide.rst @@ -18,7 +18,7 @@ Get all argument list: [--find-by-ext] [--depth POSITIVE_INT] [--no-filters] [--doc] [--ml_threshold FLOAT_OR_STR] [--ml_batch_size POSITIVE_INT] [--ml_config PATH] [--ml_model PATH] [--ml_providers STR] [--api_validation] [--jobs POSITIVE_INT] [--skip_ignored] [--save-json [PATH]] - [--save-xlsx [PATH]] [--hashed] [--subtext] [--sort] [--log LOG_LEVEL] + [--save-xlsx [PATH]] [--color] [--hashed] [--subtext] [--sort] [--log LOG_LEVEL] [--size_limit SIZE_LIMIT] [--banner] [--version] options: @@ -54,6 +54,7 @@ Get all argument list: --skip_ignored parse .gitignore files and skip credentials from ignored objects --save-json [PATH] save result to json file (default: output.json) --save-xlsx [PATH] save result to xlsx file (default: output.xlsx) + --color, -C print results with colorization --hashed line, variable, value will be hashed in output --subtext line text will be stripped in 160 symbols but value and variable are kept --sort enable output sorting diff --git a/pyproject.toml b/pyproject.toml index f7adfec16..9898d0fcf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,7 @@ authors = [ dependencies = [ "base58", "beautifulsoup4>=4.11.0", + "colorama", "cryptography", "GitPython", "google_auth_oauthlib", diff --git a/requirements.txt b/requirements.txt index a503eb483..cc23fc162 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,7 @@ hatchling==1.26.3 # Common requirements base58==2.1.1 beautifulsoup4==4.12.3 +colorama==0.4.6 cryptography==43.0.3 GitPython==3.1.43 google-auth-oauthlib==1.2.1 diff --git a/tests/test_app.py b/tests/test_app.py index 1f1654936..b5b4c153c 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -212,6 +212,20 @@ def test_it_works_with_api_p(self) -> None: # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + def test_it_works_with_patch_color_p(self) -> None: + target_path = str(SAMPLES_PATH / "password.patch") + _stdout, _stderr = self._m_credsweeper(["--diff_path", target_path, "--log", "silence", "--color"]) + output = " ".join(_stdout.split()[:-1]) + expected = """ + \x1b[1mPassword .changes/1.16.98.json:added:3\x1b[0m + "\x1b[94mpassword\x1b[0m"\x1b[92m:\x1b[0m "\x1b[93mdkajco1\x1b[0m" + Added File Credentials: 1 Deleted File Credentials: 0 Time Elapsed: + """ + expected = " ".join(expected.split()) + self.assertEqual(expected, output) + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + def test_it_works_n(self) -> None: _stdout, _stderr = self._m_credsweeper([]) @@ -243,6 +257,7 @@ def test_it_works_n(self) -> None: " [--skip_ignored]" \ " [--save-json [PATH]]" \ " [--save-xlsx [PATH]]" \ + " [--color]" \ " [--hashed]" \ " [--subtext]" \ " [--sort]" \ diff --git a/tests/test_main.py b/tests/test_main.py index 7cfb7b25c..a077e0187 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -163,6 +163,7 @@ def test_main_path_p(self, mock_get_arguments) -> None: diff_path=[str(target_path)], json_filename=os.path.join(tmp_dir, f"{__name__}.json"), xlsx_filename=None, + color=False, subtext=False, hashed=False, rule_path=None, @@ -450,7 +451,7 @@ def test_tar_n(self) -> None: def test_aws_multi_p(self) -> None: content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH / "aws_multi.md"]) - cred_sweeper = CredSweeper(ml_threshold=0) + cred_sweeper = CredSweeper(ml_threshold=0, color=True, hashed=True) cred_sweeper.run(content_provider=content_provider) for i in cred_sweeper.credential_manager.get_credentials(): if "AWS Multi" == i.rule_name: @@ -609,7 +610,7 @@ def test_yaml_n(self) -> None: def test_encoded_p(self) -> None: # test for finding credentials in ENCODED data content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH / "encoded_data"]) - cred_sweeper = CredSweeper(depth=5, ml_threshold=0) + cred_sweeper = CredSweeper(depth=5, ml_threshold=0, color=True) cred_sweeper.run(content_provider=content_provider) found_credentials = cred_sweeper.credential_manager.get_credentials() self.assertEqual(2, len(found_credentials))