diff --git a/credsweeper/deep_scanner/xlsx_scanner.py b/credsweeper/deep_scanner/xlsx_scanner.py index f14b91001..a4f40f336 100644 --- a/credsweeper/deep_scanner/xlsx_scanner.py +++ b/credsweeper/deep_scanner/xlsx_scanner.py @@ -25,11 +25,10 @@ def data_scan( candidates = [] try: book = pd.read_excel(io.BytesIO(data_provider.data), sheet_name=None, header=None) - sheet_lines = [] for sheet_name, sheet_data in book.items(): - text = sheet_data.fillna('').astype(str) - for i in text.values: - sheet_lines.append('\t'.join(i)) + # replace open xml carriage returns _x000D_ before line feed only + df = sheet_data.replace(to_replace="_x000D_\n", value='\n', regex=True).fillna('').astype(str) + sheet_lines = ['\t'.join(x) for x in df.values] string_data_provider = StringContentProvider(lines=sheet_lines, file_path=data_provider.file_path, file_type=data_provider.file_type, diff --git a/credsweeper/filters/value_discord_bot_check.py b/credsweeper/filters/value_discord_bot_check.py index 583630058..b53f5c3a5 100644 --- a/credsweeper/filters/value_discord_bot_check.py +++ b/credsweeper/filters/value_discord_bot_check.py @@ -1,9 +1,11 @@ import contextlib +from credsweeper.common.constants import Chars from credsweeper.config import Config from credsweeper.credentials import LineData from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.filters import Filter +from credsweeper.filters.value_entropy_base64_check import ValueEntropyBase64Check from credsweeper.utils import Util @@ -28,6 +30,10 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool: # . must be in value according regex dot_separator_index = line_data.value.index('.') id_part = line_data.value[:dot_separator_index] - if int(Util.decode_base64(id_part, padding_safe=True, urlsafe_detect=True)): + discord_id = int(Util.decode_base64(id_part, padding_safe=True, urlsafe_detect=True)) + entropy_part = line_data.value[dot_separator_index:] + entropy = Util.get_shannon_entropy(entropy_part, Chars.BASE64STD_CHARS.value) + min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(line_data.value)) + if not 1000 > discord_id and not min_entropy < entropy: return False return True diff --git a/tests/__init__.py b/tests/__init__.py index 1f8d81222..9a87e8d7e 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -17,10 +17,10 @@ SAMPLES_POST_CRED_COUNT = SAMPLES_CRED_COUNT - ML_FILTERED # with option --doc -SAMPLES_IN_DOC = 453 +SAMPLES_IN_DOC = 463 # archived credentials that are not found without --depth -SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 29 +SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 33 SAMPLES_IN_DEEP_2 = SAMPLES_IN_DEEP_1 + 54 SAMPLES_IN_DEEP_3 = SAMPLES_IN_DEEP_2 + 1 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index e2a9c9433..f2cbed01f 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -472,168 +472,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "AWS Client ID", - "severity": "high", - "confidence": "moderate", - "line_data_list": [ - { - "line": "AKIAGIREOGIODT1X4BT7", - "line_num": 2, - "path": "./tests/samples/aws_id.ods", - "info": "./tests/samples/aws_id.ods|ZIP|content.xml|RAW", - "value": "AKIAGIREOGIODT1X4BT7", - "value_start": 3882, - "value_end": 3902, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.6841837197791887, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "UUID", - "severity": "info", - "confidence": "strong", - "line_data_list": [ - { - "line": " Click to edit the title text format<footer><number><date/time>", - "line_num": 2, - "path": "./tests/samples/aws_id.pptx", - "info": "./tests/samples/aws_id.pptx|ZIP|ppt/slideMasters/slideMaster1.xml|RAW", - "value": "1B26FE4F-8819-409F-9556-40447A77EBF2", - "value_start": 3868, - "value_end": 3904, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "HEX_CHARS", - "entropy": 3.342171793538618, - "valid": true - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "UUID", - "severity": "info", - "confidence": "strong", - "line_data_list": [ - { - "line": " Footer<#>", - "line_num": 2, - "path": "./tests/samples/aws_id.pptx", - "info": "./tests/samples/aws_id.pptx|ZIP|ppt/slideLayouts/slideLayout1.xml|RAW", - "value": "42F61B0C-09B2-455B-8854-E1D3A3979B74", - "value_start": 2610, - "value_end": 2646, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "HEX_CHARS", - "entropy": 3.5535506956063068, - "valid": true - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "AWS Client ID", - "severity": "high", - "confidence": "moderate", - "line_data_list": [ - { - "line": " Follow the white rabbitAKIAGIREOGIPPTX1Y45X", - "line_num": 2, - "path": "./tests/samples/aws_id.pptx", - "info": "./tests/samples/aws_id.pptx|ZIP|ppt/slides/slide1.xml|RAW", - "value": "AKIAGIREOGIPPTX1Y45X", - "value_start": 2403, - "value_end": 2423, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.6841837197791887, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "UUID", - "severity": "info", - "confidence": "strong", - "line_data_list": [ - { - "line": "", - "line_num": 2, - "path": "./tests/samples/aws_id.xlsx", - "info": "./tests/samples/aws_id.xlsx|ZIP|xl/workbook.xml|RAW", - "value": "7626C862-2A13-11E5-B345-FEFF819CDC9F", - "value_start": 714, - "value_end": 750, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "HEX_CHARS", - "entropy": 3.4770260427684323, - "valid": true - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "AWS Client ID", - "severity": "high", - "confidence": "moderate", - "line_data_list": [ - { - "line": "AKIAGIREOGIAXLSX4BT5", - "line_num": 2, - "path": "./tests/samples/aws_id.xlsx", - "info": "./tests/samples/aws_id.xlsx|ZIP|xl/sharedStrings.xml|RAW", - "value": "AKIAGIREOGIAXLSX4BT5", - "value_start": 125, - "value_end": 145, - "variable": null, - "variable_start": -2, - "variable_end": -2, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.6464393446710153, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -11186,6 +11024,60 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "AWS Client ID", + "severity": "high", + "confidence": "moderate", + "line_data_list": [ + { + "line": "AKIAGIREOGIAXLSX4BT5userhostloginpassword\ud64d\uae38\ub3d9\uc804192.168.0.1adminH0NgGi1d0nGroot127.0.0.1rootiMr00TA1 password is w3Ry5tR0nGA2 ID:master,PW:dipPr10Gg!B3 192.168.0.1 master/NBd@126t!\uc8fc\uc778 FNAT-CC0TG_old10.53.51.17192.168.101.96377710.53.51.17192.168.101.9 63777 \uc8fc\uc778 FNAT-CC0TG_oldpassword:\u25a1 \ubb38\uc758 \ub0b4\uc6a9 : \u203b Error Stack Trace\ub3c4 \ud568\uaed8 \ucca8\ubd80 \ubd80\ud0c1\ub4dc\ub9bd\ub2c8\ub2e4.12345F16 224.52.124.93 root/A0dM1Nka", + "line_num": 2, + "path": "./tests/samples/sample.ods", + "info": "./tests/samples/sample.ods|ZIP|content.xml|RAW", + "value": "AKIAGIREOGIAXLSX4BT5", + "value_start": 7621, + "value_end": 7641, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.6464393446710153, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.989, + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": "AKIAGIREOGIAXLSX4BT5userhostloginpassword\ud64d\uae38\ub3d9\uc804192.168.0.1adminH0NgGi1d0nGroot127.0.0.1rootiMr00TA1 password is w3Ry5tR0nGA2 ID:master,PW:dipPr10Gg!B3 192.168.0.1 master/NBd@126t!\uc8fc\uc778 FNAT-CC0TG_old10.53.51.17192.168.101.96377710.53.51.17192.168.101.9 63777 \uc8fc\uc778 FNAT-CC0TG_oldpassword:\u25a1 \ubb38\uc758 \ub0b4\uc6a9 : \u203b Error Stack Trace\ub3c4 \ud568\uaed8 \ucca8\ubd80 \ubd80\ud0c1\ub4dc\ub9bd\ub2c8\ub2e4.12345F16 224.52.124.93 root/A0dM1Nka", + "line_num": 2, + "path": "./tests/samples/sample.ods", + "info": "./tests/samples/sample.ods|ZIP|content.xml|HTML", + "value": "dipPr10Gg!B3", + "value_start": 136, + "value_end": 148, + "variable": "PW", + "variable_start": 133, + "variable_end": 135, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.2862156256610597, + "valid": false + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -11294,6 +11186,87 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "UUID", + "severity": "info", + "confidence": "strong", + "line_data_list": [ + { + "line": " Click to edit the title text format<footer><number><date/time>", + "line_num": 2, + "path": "./tests/samples/sample.pptx", + "info": "./tests/samples/sample.pptx|ZIP|ppt/slideMasters/slideMaster1.xml|RAW", + "value": "1B26FE4F-8819-409F-9556-40447A77EBF2", + "value_start": 3868, + "value_end": 3904, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "HEX_CHARS", + "entropy": 3.342171793538618, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "UUID", + "severity": "info", + "confidence": "strong", + "line_data_list": [ + { + "line": " Footer<#>", + "line_num": 2, + "path": "./tests/samples/sample.pptx", + "info": "./tests/samples/sample.pptx|ZIP|ppt/slideLayouts/slideLayout1.xml|RAW", + "value": "42F61B0C-09B2-455B-8854-E1D3A3979B74", + "value_start": 2610, + "value_end": 2646, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "HEX_CHARS", + "entropy": 3.5535506956063068, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "AWS Client ID", + "severity": "high", + "confidence": "moderate", + "line_data_list": [ + { + "line": " Follow the white rabbitAKIAGIREOGIPPTX1Y45X", + "line_num": 2, + "path": "./tests/samples/sample.pptx", + "info": "./tests/samples/sample.pptx|ZIP|ppt/slides/slide1.xml|RAW", + "value": "AKIAGIREOGIPPTX1Y45X", + "value_start": 2403, + "value_end": 2423, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.6841837197791887, + "valid": false + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", @@ -11321,6 +11294,141 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.886, + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": "AKIAGIREOGIAXLSX4BT5userhostloginpassword \ud64d\uae38\ub3d9\uc804192.168.0.1adminH0NgGi1d0nGroot127.0.0.1iMr00TA1 password is w3Ry5tR0nGA2 ID:master,PW:dipPr10Gg!B3 192.168.0.1 master/NBd@126t! \uc8fc\uc778 FNAT-CC0TG_old10.53.51.17192.168.101.9 63777password: \u25a1 \ubb38\uc758 \ub0b4\uc6a9 : \u203b Error Stack Trace\ub3c4 \ud568\uaed8 \ucca8\ubd80 \ubd80\ud0c1\ub4dc\ub9bd\ub2c8\ub2e4. 12345F16 224.52.124.93 root/A0dM1Nka", + "line_num": 2, + "path": "./tests/samples/sample.xlsx", + "info": "./tests/samples/sample.xlsx|ZIP|xl/sharedStrings.xml|RAW", + "value": " ", + "value_start": 1163, + "value_end": 1167, + "variable": "password", + "variable_start": 1154, + "variable_end": 1162, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 1.0, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "UUID", + "severity": "info", + "confidence": "strong", + "line_data_list": [ + { + "line": "", + "line_num": 2, + "path": "./tests/samples/sample.xlsx", + "info": "./tests/samples/sample.xlsx|ZIP|xl/workbook.xml|RAW", + "value": "7626C862-2A13-11E5-B345-FEFF819CDC9F", + "value_start": 1015, + "value_end": 1051, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "HEX_CHARS", + "entropy": 3.4770260427684323, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "AWS Client ID", + "severity": "high", + "confidence": "moderate", + "line_data_list": [ + { + "line": "AKIAGIREOGIAXLSX4BT5userhostloginpassword \ud64d\uae38\ub3d9\uc804192.168.0.1adminH0NgGi1d0nGroot127.0.0.1iMr00TA1 password is w3Ry5tR0nGA2 ID:master,PW:dipPr10Gg!B3 192.168.0.1 master/NBd@126t! \uc8fc\uc778 FNAT-CC0TG_old10.53.51.17192.168.101.9 63777password: \u25a1 \ubb38\uc758 \ub0b4\uc6a9 : \u203b Error Stack Trace\ub3c4 \ud568\uaed8 \ucca8\ubd80 \ubd80\ud0c1\ub4dc\ub9bd\ub2c8\ub2e4. 12345F16 224.52.124.93 root/A0dM1Nka", + "line_num": 2, + "path": "./tests/samples/sample.xlsx", + "info": "./tests/samples/sample.xlsx|ZIP|xl/sharedStrings.xml|RAW", + "value": "AKIAGIREOGIAXLSX4BT5", + "value_start": 127, + "value_end": 147, + "variable": null, + "variable_start": -2, + "variable_end": -2, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.6464393446710153, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.899, + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": "AKIAGIREOGIAXLSX4BT5userhostloginpassword \ud64d\uae38\ub3d9\uc804192.168.0.1adminH0NgGi1d0nGroot127.0.0.1iMr00TA1 password is w3Ry5tR0nGA2 ID:master,PW:dipPr10Gg!B3 192.168.0.1 master/NBd@126t! \uc8fc\uc778 FNAT-CC0TG_old10.53.51.17192.168.101.9 63777password: \u25a1 \ubb38\uc758 \ub0b4\uc6a9 : \u203b Error Stack Trace\ub3c4 \ud568\uaed8 \ucca8\ubd80 \ubd80\ud0c1\ub4dc\ub9bd\ub2c8\ub2e4. 12345F16 224.52.124.93 root/A0dM1Nka", + "line_num": 2, + "path": "./tests/samples/sample.xlsx", + "info": "./tests/samples/sample.xlsx|ZIP|xl/sharedStrings.xml|RAW", + "value": "dipPr10Gg!