From 3938c7dda74f1767cee3973d214ad84af770c3f8 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Fri, 13 Dec 2024 10:20:53 +0200 Subject: [PATCH] Credential colorization in stdout export (#632) * output with colorization * missed types lib, import optimization, extend test * fix * fix * fix --- credsweeper/__main__.py | 2 + credsweeper/app.py | 13 +++++++ credsweeper/credentials/line_data.py | 37 +++++++++++++++++++ credsweeper/deep_scanner/gzip_scanner.py | 2 +- .../file_handler/diff_content_provider.py | 2 +- credsweeper/filters/value_camel_case_check.py | 2 +- credsweeper/filters/value_file_path_check.py | 2 +- credsweeper/ml_model/features/__init__.py | 2 +- credsweeper/ml_model/ml_validator.py | 2 +- docs/source/guide.rst | 3 +- pyproject.toml | 1 + requirements.txt | 2 + tests/test_app.py | 15 ++++++++ tests/test_main.py | 5 ++- 14 files changed, 81 insertions(+), 9 deletions(-) diff --git a/credsweeper/__main__.py b/credsweeper/__main__.py index f025e5d51..b607a40a5 100644 --- a/credsweeper/__main__.py +++ b/credsweeper/__main__.py @@ -224,6 +224,7 @@ def get_arguments() -> Namespace: const="output.xlsx", dest="xlsx_filename", metavar="PATH") + parser.add_argument("--color", "-C", help="print results with colorization", action="store_const", const=True) parser.add_argument("--hashed", help="line, variable, value will be hashed in output", action="store_const", @@ -299,6 +300,7 @@ def scan(args: Namespace, content_provider: AbstractProvider, json_filename: Opt api_validation=args.api_validation, json_filename=json_filename, xlsx_filename=xlsx_filename, + color=args.color, hashed=args.hashed, subtext=args.subtext, sort_output=args.sort_output, diff --git a/credsweeper/app.py b/credsweeper/app.py index f60b28394..dfa8f3782 100644 --- a/credsweeper/app.py +++ b/credsweeper/app.py @@ -5,6 +5,7 @@ from typing import Any, List, Optional, Union, Dict, Sequence, Tuple import pandas as pd +from colorama import Style # Directory of credsweeper sources MUST be placed before imports to avoid circular import error APP_PATH = Path(__file__).resolve().parent @@ -42,6 +43,7 @@ def __init__(self, api_validation: bool = False, json_filename: Union[None, str, Path] = None, xlsx_filename: Union[None, str, Path] = None, + color: bool = False, hashed: bool = False, subtext: bool = False, sort_output: bool = False, @@ -73,6 +75,7 @@ def __init__(self, to json xlsx_filename: optional string variable, path to save result to xlsx + color: print results to stdout with colorization hashed: use hash of line, value and variable instead plain text subtext: use subtext of line near variable-value like it performed in ML use_filters: boolean variable, specifying the need of rule filters @@ -112,6 +115,7 @@ def __init__(self, self.credential_manager = CredentialManager() self.json_filename: Union[None, str, Path] = json_filename self.xlsx_filename: Union[None, str, Path] = xlsx_filename + self.color = color self.hashed = hashed self.subtext = subtext self.sort_output = sort_output @@ -427,6 +431,15 @@ def export_results(self) -> None: df = pd.DataFrame(data=data_list) df.to_excel(self.xlsx_filename, index=False) + if self.color: + is_exported = True + for credential in credentials: + for line_data in credential.line_data_list: + # bright rule name and path or info + print(Style.BRIGHT + credential.rule_name + + f" {line_data.info or line_data.path}:{line_data.line_num}" + Style.RESET_ALL) + print(line_data.get_colored_line(hashed=self.hashed, subtext=self.subtext)) + if is_exported is False: for credential in credentials: print(credential.to_str(hashed=self.hashed, subtext=self.subtext)) diff --git a/credsweeper/credentials/line_data.py b/credsweeper/credentials/line_data.py index 92f801484..7fba85e5d 100644 --- a/credsweeper/credentials/line_data.py +++ b/credsweeper/credentials/line_data.py @@ -5,6 +5,8 @@ from functools import cached_property from typing import Any, Dict, Optional, Tuple +from colorama import Fore, Style + from credsweeper.common.constants import MAX_LINE_LENGTH, UTF_8, StartEnd, ML_HUNK from credsweeper.config import Config from credsweeper.utils import Util @@ -414,3 +416,38 @@ def to_json(self, hashed: bool, subtext: bool) -> Dict: } reported_output = {k: v for k, v in full_output.items() if k in self.config.line_data_output} return reported_output + + def get_colored_line(self, hashed: bool, subtext: bool = False) -> str: + """Represents the LineData with a value, separator, and variable color formatting""" + if hashed: + # return colored hash + return Fore.LIGHTGREEN_EX \ + + self.get_hash_or_subtext(self.line, hashed, + StartEnd(self.value_start, self.value_end) if subtext else None) \ + + Style.RESET_ALL + # at least, value must present + line = self.line[:self.value_start] \ + + Fore.LIGHTYELLOW_EX \ + + self.line[self.value_start:self.value_end] \ + + Style.RESET_ALL \ + + self.line[self.value_end:] # noqa: E127 + # separator may be missing + if 0 <= self.separator_start < self.separator_end <= self.value_start: + line = line[:self.separator_start] \ + + Fore.LIGHTGREEN_EX \ + + line[self.separator_start:self.separator_end] \ + + Style.RESET_ALL \ + + line[self.separator_end:] + # variable may be missing + if 0 <= self.separator_start \ + and 0 <= self.variable_start < self.variable_end <= self.separator_end <= self.value_start \ + or 0 <= self.variable_start < self.variable_end <= self.value_start: + line = line[:self.variable_start] \ + + Fore.LIGHTBLUE_EX \ + + line[self.variable_start:self.variable_end] \ + + Style.RESET_ALL \ + + line[self.variable_end:] + if subtext: + # display part of the text, centered around the start of the value, style reset at the end as a fallback + line = f"{Util.subtext(line, self.value_start + len(line) - len(self.line), ML_HUNK)}{Style.RESET_ALL}" + return line diff --git a/credsweeper/deep_scanner/gzip_scanner.py b/credsweeper/deep_scanner/gzip_scanner.py index 06e2321ca..1f8ec39ee 100644 --- a/credsweeper/deep_scanner/gzip_scanner.py +++ b/credsweeper/deep_scanner/gzip_scanner.py @@ -5,10 +5,10 @@ from pathlib import Path from typing import List -from credsweeper.utils import Util from credsweeper.credentials import Candidate from credsweeper.deep_scanner.abstract_scanner import AbstractScanner from credsweeper.file_handler.data_content_provider import DataContentProvider +from credsweeper.utils import Util logger = logging.getLogger(__name__) diff --git a/credsweeper/file_handler/diff_content_provider.py b/credsweeper/file_handler/diff_content_provider.py index f4dc1cb93..9669cdbde 100644 --- a/credsweeper/file_handler/diff_content_provider.py +++ b/credsweeper/file_handler/diff_content_provider.py @@ -31,7 +31,7 @@ def __init__( file_path: str, # change_type: DiffRowType, # diff: List[DiffDict]) -> None: - super().__init__(file_path=file_path, info=change_type.value) + super().__init__(file_path=file_path, info=f"{file_path}:{change_type.value}") self.change_type = change_type self.diff = diff diff --git a/credsweeper/filters/value_camel_case_check.py b/credsweeper/filters/value_camel_case_check.py index 0fad1df90..7faa7d15e 100644 --- a/credsweeper/filters/value_camel_case_check.py +++ b/credsweeper/filters/value_camel_case_check.py @@ -1,7 +1,7 @@ import re -from credsweeper.config import Config from credsweeper.common import static_keyword_checklist +from credsweeper.config import Config from credsweeper.credentials import LineData from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.filters import Filter diff --git a/credsweeper/filters/value_file_path_check.py b/credsweeper/filters/value_file_path_check.py index b871547dd..81bd3dae4 100644 --- a/credsweeper/filters/value_file_path_check.py +++ b/credsweeper/filters/value_file_path_check.py @@ -1,5 +1,5 @@ -from credsweeper.common.constants import Chars from credsweeper.common import static_keyword_checklist +from credsweeper.common.constants import Chars from credsweeper.config import Config from credsweeper.credentials import LineData from credsweeper.file_handler.analysis_target import AnalysisTarget diff --git a/credsweeper/ml_model/features/__init__.py b/credsweeper/ml_model/features/__init__.py index 95b705eca..480c3ecb4 100644 --- a/credsweeper/ml_model/features/__init__.py +++ b/credsweeper/ml_model/features/__init__.py @@ -3,9 +3,9 @@ from credsweeper.ml_model.features.hartley_entropy import HartleyEntropy from credsweeper.ml_model.features.has_html_tag import HasHtmlTag from credsweeper.ml_model.features.is_secret_numeric import IsSecretNumeric -from credsweeper.ml_model.features.search_in_attribute import SearchInAttribute from credsweeper.ml_model.features.reny_entropy import RenyiEntropy from credsweeper.ml_model.features.rule_name import RuleName +from credsweeper.ml_model.features.search_in_attribute import SearchInAttribute from credsweeper.ml_model.features.shannon_entropy import ShannonEntropy from credsweeper.ml_model.features.word_in_line import WordInLine from credsweeper.ml_model.features.word_in_path import WordInPath diff --git a/credsweeper/ml_model/ml_validator.py b/credsweeper/ml_model/ml_validator.py index 589a4bb60..1617111b1 100644 --- a/credsweeper/ml_model/ml_validator.py +++ b/credsweeper/ml_model/ml_validator.py @@ -7,9 +7,9 @@ import numpy as np import onnxruntime as ort +import credsweeper.ml_model.features as features from credsweeper.common.constants import ThresholdPreset, ML_HUNK from credsweeper.credentials import Candidate, CandidateKey -import credsweeper.ml_model.features as features from credsweeper.utils import Util logger = logging.getLogger(__name__) diff --git a/docs/source/guide.rst b/docs/source/guide.rst index ebebbc67f..62c41d7bf 100644 --- a/docs/source/guide.rst +++ b/docs/source/guide.rst @@ -18,7 +18,7 @@ Get all argument list: [--find-by-ext] [--depth POSITIVE_INT] [--no-filters] [--doc] [--ml_threshold FLOAT_OR_STR] [--ml_batch_size POSITIVE_INT] [--ml_config PATH] [--ml_model PATH] [--ml_providers STR] [--api_validation] [--jobs POSITIVE_INT] [--skip_ignored] [--save-json [PATH]] - [--save-xlsx [PATH]] [--hashed] [--subtext] [--sort] [--log LOG_LEVEL] + [--save-xlsx [PATH]] [--color] [--hashed] [--subtext] [--sort] [--log LOG_LEVEL] [--size_limit SIZE_LIMIT] [--banner] [--version] options: @@ -54,6 +54,7 @@ Get all argument list: --skip_ignored parse .gitignore files and skip credentials from ignored objects --save-json [PATH] save result to json file (default: output.json) --save-xlsx [PATH] save result to xlsx file (default: output.xlsx) + --color, -C print results with colorization --hashed line, variable, value will be hashed in output --subtext line text will be stripped in 160 symbols but value and variable are kept --sort enable output sorting diff --git a/pyproject.toml b/pyproject.toml index f7adfec16..9898d0fcf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,7 @@ authors = [ dependencies = [ "base58", "beautifulsoup4>=4.11.0", + "colorama", "cryptography", "GitPython", "google_auth_oauthlib", diff --git a/requirements.txt b/requirements.txt index a503eb483..379b364f8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,7 @@ hatchling==1.26.3 # Common requirements base58==2.1.1 beautifulsoup4==4.12.3 +colorama==0.4.6 cryptography==43.0.3 GitPython==3.1.43 google-auth-oauthlib==1.2.1 @@ -45,6 +46,7 @@ pytest-cov pytest-html pytest-random-order types-beautifulsoup4 +types-colorama types-PyYAML types-requests types-oauthlib diff --git a/tests/test_app.py b/tests/test_app.py index 1f1654936..b5b4c153c 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -212,6 +212,20 @@ def test_it_works_with_api_p(self) -> None: # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + def test_it_works_with_patch_color_p(self) -> None: + target_path = str(SAMPLES_PATH / "password.patch") + _stdout, _stderr = self._m_credsweeper(["--diff_path", target_path, "--log", "silence", "--color"]) + output = " ".join(_stdout.split()[:-1]) + expected = """ + \x1b[1mPassword .changes/1.16.98.json:added:3\x1b[0m + "\x1b[94mpassword\x1b[0m"\x1b[92m:\x1b[0m "\x1b[93mdkajco1\x1b[0m" + Added File Credentials: 1 Deleted File Credentials: 0 Time Elapsed: + """ + expected = " ".join(expected.split()) + self.assertEqual(expected, output) + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + def test_it_works_n(self) -> None: _stdout, _stderr = self._m_credsweeper([]) @@ -243,6 +257,7 @@ def test_it_works_n(self) -> None: " [--skip_ignored]" \ " [--save-json [PATH]]" \ " [--save-xlsx [PATH]]" \ + " [--color]" \ " [--hashed]" \ " [--subtext]" \ " [--sort]" \ diff --git a/tests/test_main.py b/tests/test_main.py index 7cfb7b25c..8961f59ee 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -163,6 +163,7 @@ def test_main_path_p(self, mock_get_arguments) -> None: diff_path=[str(target_path)], json_filename=os.path.join(tmp_dir, f"{__name__}.json"), xlsx_filename=None, + color=False, subtext=False, hashed=False, rule_path=None, @@ -450,7 +451,7 @@ def test_tar_n(self) -> None: def test_aws_multi_p(self) -> None: content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH / "aws_multi.md"]) - cred_sweeper = CredSweeper(ml_threshold=0) + cred_sweeper = CredSweeper(ml_threshold=0, color=True, hashed=True) cred_sweeper.run(content_provider=content_provider) for i in cred_sweeper.credential_manager.get_credentials(): if "AWS Multi" == i.rule_name: @@ -609,7 +610,7 @@ def test_yaml_n(self) -> None: def test_encoded_p(self) -> None: # test for finding credentials in ENCODED data content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH / "encoded_data"]) - cred_sweeper = CredSweeper(depth=5, ml_threshold=0) + cred_sweeper = CredSweeper(depth=5, ml_threshold=0, color=True, subtext=True) cred_sweeper.run(content_provider=content_provider) found_credentials = cred_sweeper.credential_manager.get_credentials() self.assertEqual(2, len(found_credentials))