Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Credential colorization in stdout export #632

Merged
merged 5 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions credsweeper/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ def get_arguments() -> Namespace:
const="output.xlsx",
dest="xlsx_filename",
metavar="PATH")
parser.add_argument("--color", "-C", help="print results with colorization", action="store_const", const=True)
parser.add_argument("--hashed",
help="line, variable, value will be hashed in output",
action="store_const",
Expand Down Expand Up @@ -299,6 +300,7 @@ def scan(args: Namespace, content_provider: AbstractProvider, json_filename: Opt
api_validation=args.api_validation,
json_filename=json_filename,
xlsx_filename=xlsx_filename,
color=args.color,
hashed=args.hashed,
subtext=args.subtext,
sort_output=args.sort_output,
Expand Down
13 changes: 13 additions & 0 deletions credsweeper/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Any, List, Optional, Union, Dict, Sequence, Tuple

import pandas as pd
from colorama import Style

# Directory of credsweeper sources MUST be placed before imports to avoid circular import error
APP_PATH = Path(__file__).resolve().parent
Expand Down Expand Up @@ -42,6 +43,7 @@ def __init__(self,
api_validation: bool = False,
json_filename: Union[None, str, Path] = None,
xlsx_filename: Union[None, str, Path] = None,
color: bool = False,
hashed: bool = False,
subtext: bool = False,
sort_output: bool = False,
Expand Down Expand Up @@ -73,6 +75,7 @@ def __init__(self,
to json
xlsx_filename: optional string variable, path to save result
to xlsx
color: print results to stdout with colorization
hashed: use hash of line, value and variable instead plain text
subtext: use subtext of line near variable-value like it performed in ML
use_filters: boolean variable, specifying the need of rule filters
Expand Down Expand Up @@ -112,6 +115,7 @@ def __init__(self,
self.credential_manager = CredentialManager()
self.json_filename: Union[None, str, Path] = json_filename
self.xlsx_filename: Union[None, str, Path] = xlsx_filename
self.color = color
self.hashed = hashed
self.subtext = subtext
self.sort_output = sort_output
Expand Down Expand Up @@ -427,6 +431,15 @@ def export_results(self) -> None:
df = pd.DataFrame(data=data_list)
df.to_excel(self.xlsx_filename, index=False)

if self.color:
is_exported = True
for credential in credentials:
for line_data in credential.line_data_list:
# bright rule name and path or info
print(Style.BRIGHT + credential.rule_name +
f" {line_data.info or line_data.path}:{line_data.line_num}" + Style.RESET_ALL)
print(line_data.get_colored_line(hashed=self.hashed, subtext=self.subtext))

if is_exported is False:
for credential in credentials:
print(credential.to_str(hashed=self.hashed, subtext=self.subtext))
37 changes: 37 additions & 0 deletions credsweeper/credentials/line_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from functools import cached_property
from typing import Any, Dict, Optional, Tuple

from colorama import Fore, Style

from credsweeper.common.constants import MAX_LINE_LENGTH, UTF_8, StartEnd, ML_HUNK
from credsweeper.config import Config
from credsweeper.utils import Util
Expand Down Expand Up @@ -414,3 +416,38 @@ def to_json(self, hashed: bool, subtext: bool) -> Dict:
}
reported_output = {k: v for k, v in full_output.items() if k in self.config.line_data_output}
return reported_output

def get_colored_line(self, hashed: bool, subtext: bool = False) -> str:
"""Represents the LineData with a value, separator, and variable color formatting"""
if hashed:
# return colored hash
return Fore.LIGHTGREEN_EX \
+ self.get_hash_or_subtext(self.line, hashed,
StartEnd(self.value_start, self.value_end) if subtext else None) \
+ Style.RESET_ALL
# at least, value must present
line = self.line[:self.value_start] \
+ Fore.LIGHTYELLOW_EX \
+ self.line[self.value_start:self.value_end] \
+ Style.RESET_ALL \
+ self.line[self.value_end:] # noqa: E127
# separator may be missing
if 0 <= self.separator_start < self.separator_end <= self.value_start:
line = line[:self.separator_start] \
+ Fore.LIGHTGREEN_EX \
+ line[self.separator_start:self.separator_end] \
+ Style.RESET_ALL \
+ line[self.separator_end:]
# variable may be missing
if 0 <= self.separator_start \
and 0 <= self.variable_start < self.variable_end <= self.separator_end <= self.value_start \
or 0 <= self.variable_start < self.variable_end <= self.value_start:
line = line[:self.variable_start] \
+ Fore.LIGHTBLUE_EX \
+ line[self.variable_start:self.variable_end] \
+ Style.RESET_ALL \
+ line[self.variable_end:]
if subtext:
# display part of the text, centered around the start of the value, style reset at the end as a fallback
line = f"{Util.subtext(line, self.value_start + len(line) - len(self.line), ML_HUNK)}{Style.RESET_ALL}"
return line
2 changes: 1 addition & 1 deletion credsweeper/deep_scanner/gzip_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
from pathlib import Path
from typing import List

from credsweeper.utils import Util
from credsweeper.credentials import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.utils import Util

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion credsweeper/file_handler/diff_content_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(
file_path: str, #
change_type: DiffRowType, #
diff: List[DiffDict]) -> None:
super().__init__(file_path=file_path, info=change_type.value)
super().__init__(file_path=file_path, info=f"{file_path}:{change_type.value}")
self.change_type = change_type
self.diff = diff

Expand Down
2 changes: 1 addition & 1 deletion credsweeper/filters/value_camel_case_check.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re

from credsweeper.config import Config
from credsweeper.common import static_keyword_checklist
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/filters/value_file_path_check.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from credsweeper.common.constants import Chars
from credsweeper.common import static_keyword_checklist
from credsweeper.common.constants import Chars
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/ml_model/features/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from credsweeper.ml_model.features.hartley_entropy import HartleyEntropy
from credsweeper.ml_model.features.has_html_tag import HasHtmlTag
from credsweeper.ml_model.features.is_secret_numeric import IsSecretNumeric
from credsweeper.ml_model.features.search_in_attribute import SearchInAttribute
from credsweeper.ml_model.features.reny_entropy import RenyiEntropy
from credsweeper.ml_model.features.rule_name import RuleName
from credsweeper.ml_model.features.search_in_attribute import SearchInAttribute
from credsweeper.ml_model.features.shannon_entropy import ShannonEntropy
from credsweeper.ml_model.features.word_in_line import WordInLine
from credsweeper.ml_model.features.word_in_path import WordInPath
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/ml_model/ml_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
import numpy as np
import onnxruntime as ort

import credsweeper.ml_model.features as features
from credsweeper.common.constants import ThresholdPreset, ML_HUNK
from credsweeper.credentials import Candidate, CandidateKey
import credsweeper.ml_model.features as features
from credsweeper.utils import Util

logger = logging.getLogger(__name__)
Expand Down
3 changes: 2 additions & 1 deletion docs/source/guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Get all argument list:
[--find-by-ext] [--depth POSITIVE_INT] [--no-filters] [--doc] [--ml_threshold FLOAT_OR_STR]
[--ml_batch_size POSITIVE_INT] [--ml_config PATH] [--ml_model PATH] [--ml_providers STR]
[--api_validation] [--jobs POSITIVE_INT] [--skip_ignored] [--save-json [PATH]]
[--save-xlsx [PATH]] [--hashed] [--subtext] [--sort] [--log LOG_LEVEL]
[--save-xlsx [PATH]] [--color] [--hashed] [--subtext] [--sort] [--log LOG_LEVEL]
[--size_limit SIZE_LIMIT]
[--banner] [--version]
options:
Expand Down Expand Up @@ -54,6 +54,7 @@ Get all argument list:
--skip_ignored parse .gitignore files and skip credentials from ignored objects
--save-json [PATH] save result to json file (default: output.json)
--save-xlsx [PATH] save result to xlsx file (default: output.xlsx)
--color, -C print results with colorization
--hashed line, variable, value will be hashed in output
--subtext line text will be stripped in 160 symbols but value and variable are kept
--sort enable output sorting
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ authors = [
dependencies = [
"base58",
"beautifulsoup4>=4.11.0",
"colorama",
"cryptography",
"GitPython",
"google_auth_oauthlib",
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ hatchling==1.26.3
# Common requirements
base58==2.1.1
beautifulsoup4==4.12.3
colorama==0.4.6
cryptography==43.0.3
GitPython==3.1.43
google-auth-oauthlib==1.2.1
Expand Down Expand Up @@ -45,6 +46,7 @@ pytest-cov
pytest-html
pytest-random-order
types-beautifulsoup4
types-colorama
types-PyYAML
types-requests
types-oauthlib
Expand Down
15 changes: 15 additions & 0 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,20 @@ def test_it_works_with_api_p(self) -> None:

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

def test_it_works_with_patch_color_p(self) -> None:
target_path = str(SAMPLES_PATH / "password.patch")
_stdout, _stderr = self._m_credsweeper(["--diff_path", target_path, "--log", "silence", "--color"])
output = " ".join(_stdout.split()[:-1])
expected = """
\x1b[1mPassword .changes/1.16.98.json:added:3\x1b[0m
"\x1b[94mpassword\x1b[0m"\x1b[92m:\x1b[0m "\x1b[93mdkajco1\x1b[0m"
Added File Credentials: 1 Deleted File Credentials: 0 Time Elapsed:
"""
expected = " ".join(expected.split())
self.assertEqual(expected, output)

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

def test_it_works_n(self) -> None:
_stdout, _stderr = self._m_credsweeper([])

Expand Down Expand Up @@ -243,6 +257,7 @@ def test_it_works_n(self) -> None:
" [--skip_ignored]" \
" [--save-json [PATH]]" \
" [--save-xlsx [PATH]]" \
" [--color]" \
" [--hashed]" \
" [--subtext]" \
" [--sort]" \
Expand Down
5 changes: 3 additions & 2 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def test_main_path_p(self, mock_get_arguments) -> None:
diff_path=[str(target_path)],
json_filename=os.path.join(tmp_dir, f"{__name__}.json"),
xlsx_filename=None,
color=False,
subtext=False,
hashed=False,
rule_path=None,
Expand Down Expand Up @@ -450,7 +451,7 @@ def test_tar_n(self) -> None:

def test_aws_multi_p(self) -> None:
content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH / "aws_multi.md"])
cred_sweeper = CredSweeper(ml_threshold=0)
cred_sweeper = CredSweeper(ml_threshold=0, color=True, hashed=True)
cred_sweeper.run(content_provider=content_provider)
for i in cred_sweeper.credential_manager.get_credentials():
if "AWS Multi" == i.rule_name:
Expand Down Expand Up @@ -609,7 +610,7 @@ def test_yaml_n(self) -> None:
def test_encoded_p(self) -> None:
# test for finding credentials in ENCODED data
content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH / "encoded_data"])
cred_sweeper = CredSweeper(depth=5, ml_threshold=0)
cred_sweeper = CredSweeper(depth=5, ml_threshold=0, color=True, subtext=True)
cred_sweeper.run(content_provider=content_provider)
found_credentials = cred_sweeper.credential_manager.get_credentials()
self.assertEqual(2, len(found_credentials))
Expand Down
Loading