diff --git a/credsweeper/deep_scanner/xlsx_scanner.py b/credsweeper/deep_scanner/xlsx_scanner.py
index f14b91001..a4f40f336 100644
--- a/credsweeper/deep_scanner/xlsx_scanner.py
+++ b/credsweeper/deep_scanner/xlsx_scanner.py
@@ -25,11 +25,10 @@ def data_scan(
candidates = []
try:
book = pd.read_excel(io.BytesIO(data_provider.data), sheet_name=None, header=None)
- sheet_lines = []
for sheet_name, sheet_data in book.items():
- text = sheet_data.fillna('').astype(str)
- for i in text.values:
- sheet_lines.append('\t'.join(i))
+ # replace open xml carriage returns _x000D_ before line feed only
+ df = sheet_data.replace(to_replace="_x000D_\n", value='\n', regex=True).fillna('').astype(str)
+ sheet_lines = ['\t'.join(x) for x in df.values]
string_data_provider = StringContentProvider(lines=sheet_lines,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
diff --git a/credsweeper/filters/value_discord_bot_check.py b/credsweeper/filters/value_discord_bot_check.py
index 583630058..b53f5c3a5 100644
--- a/credsweeper/filters/value_discord_bot_check.py
+++ b/credsweeper/filters/value_discord_bot_check.py
@@ -1,9 +1,11 @@
import contextlib
+from credsweeper.common.constants import Chars
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter
+from credsweeper.filters.value_entropy_base64_check import ValueEntropyBase64Check
from credsweeper.utils import Util
@@ -28,6 +30,10 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
# . must be in value according regex
dot_separator_index = line_data.value.index('.')
id_part = line_data.value[:dot_separator_index]
- if int(Util.decode_base64(id_part, padding_safe=True, urlsafe_detect=True)):
+ discord_id = int(Util.decode_base64(id_part, padding_safe=True, urlsafe_detect=True))
+ entropy_part = line_data.value[dot_separator_index:]
+ entropy = Util.get_shannon_entropy(entropy_part, Chars.BASE64STD_CHARS.value)
+ min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(line_data.value))
+ if not 1000 > discord_id and not min_entropy < entropy:
return False
return True
diff --git a/tests/__init__.py b/tests/__init__.py
index 1f8d81222..9a87e8d7e 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -17,10 +17,10 @@
SAMPLES_POST_CRED_COUNT = SAMPLES_CRED_COUNT - ML_FILTERED
# with option --doc
-SAMPLES_IN_DOC = 453
+SAMPLES_IN_DOC = 463
# archived credentials that are not found without --depth
-SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 29
+SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 33
SAMPLES_IN_DEEP_2 = SAMPLES_IN_DEEP_1 + 54
SAMPLES_IN_DEEP_3 = SAMPLES_IN_DEEP_2 + 1
diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json
index e2a9c9433..f2cbed01f 100644
--- a/tests/data/depth_3.json
+++ b/tests/data/depth_3.json
@@ -472,168 +472,6 @@
}
]
},
- {
- "api_validation": "NOT_AVAILABLE",
- "ml_validation": "NOT_AVAILABLE",
- "ml_probability": null,
- "rule": "AWS Client ID",
- "severity": "high",
- "confidence": "moderate",
- "line_data_list": [
- {
- "line": "AKIAGIREOGIODT1X4BT7",
- "line_num": 2,
- "path": "./tests/samples/aws_id.ods",
- "info": "./tests/samples/aws_id.ods|ZIP|content.xml|RAW",
- "value": "AKIAGIREOGIODT1X4BT7",
- "value_start": 3882,
- "value_end": 3902,
- "variable": null,
- "variable_start": -2,
- "variable_end": -2,
- "entropy_validation": {
- "iterator": "BASE64_CHARS",
- "entropy": 3.6841837197791887,
- "valid": false
- }
- }
- ]
- },
- {
- "api_validation": "NOT_AVAILABLE",
- "ml_validation": "NOT_AVAILABLE",
- "ml_probability": null,
- "rule": "UUID",
- "severity": "info",
- "confidence": "strong",
- "line_data_list": [
- {
- "line": " Click to edit the title text format<footer><number><date/time>",
- "line_num": 2,
- "path": "./tests/samples/aws_id.pptx",
- "info": "./tests/samples/aws_id.pptx|ZIP|ppt/slideMasters/slideMaster1.xml|RAW",
- "value": "1B26FE4F-8819-409F-9556-40447A77EBF2",
- "value_start": 3868,
- "value_end": 3904,
- "variable": null,
- "variable_start": -2,
- "variable_end": -2,
- "entropy_validation": {
- "iterator": "HEX_CHARS",
- "entropy": 3.342171793538618,
- "valid": true
- }
- }
- ]
- },
- {
- "api_validation": "NOT_AVAILABLE",
- "ml_validation": "NOT_AVAILABLE",
- "ml_probability": null,
- "rule": "UUID",
- "severity": "info",
- "confidence": "strong",
- "line_data_list": [
- {
- "line": " Footer<#>",
- "line_num": 2,
- "path": "./tests/samples/aws_id.pptx",
- "info": "./tests/samples/aws_id.pptx|ZIP|ppt/slideLayouts/slideLayout1.xml|RAW",
- "value": "42F61B0C-09B2-455B-8854-E1D3A3979B74",
- "value_start": 2610,
- "value_end": 2646,
- "variable": null,
- "variable_start": -2,
- "variable_end": -2,
- "entropy_validation": {
- "iterator": "HEX_CHARS",
- "entropy": 3.5535506956063068,
- "valid": true
- }
- }
- ]
- },
- {
- "api_validation": "NOT_AVAILABLE",
- "ml_validation": "NOT_AVAILABLE",
- "ml_probability": null,
- "rule": "AWS Client ID",
- "severity": "high",
- "confidence": "moderate",
- "line_data_list": [
- {
- "line": " Follow the white rabbitAKIAGIREOGIPPTX1Y45X",
- "line_num": 2,
- "path": "./tests/samples/aws_id.pptx",
- "info": "./tests/samples/aws_id.pptx|ZIP|ppt/slides/slide1.xml|RAW",
- "value": "AKIAGIREOGIPPTX1Y45X",
- "value_start": 2403,
- "value_end": 2423,
- "variable": null,
- "variable_start": -2,
- "variable_end": -2,
- "entropy_validation": {
- "iterator": "BASE64_CHARS",
- "entropy": 3.6841837197791887,
- "valid": false
- }
- }
- ]
- },
- {
- "api_validation": "NOT_AVAILABLE",
- "ml_validation": "NOT_AVAILABLE",
- "ml_probability": null,
- "rule": "UUID",
- "severity": "info",
- "confidence": "strong",
- "line_data_list": [
- {
- "line": "",
- "line_num": 2,
- "path": "./tests/samples/aws_id.xlsx",
- "info": "./tests/samples/aws_id.xlsx|ZIP|xl/workbook.xml|RAW",
- "value": "7626C862-2A13-11E5-B345-FEFF819CDC9F",
- "value_start": 714,
- "value_end": 750,
- "variable": null,
- "variable_start": -2,
- "variable_end": -2,
- "entropy_validation": {
- "iterator": "HEX_CHARS",
- "entropy": 3.4770260427684323,
- "valid": true
- }
- }
- ]
- },
- {
- "api_validation": "NOT_AVAILABLE",
- "ml_validation": "NOT_AVAILABLE",
- "ml_probability": null,
- "rule": "AWS Client ID",
- "severity": "high",
- "confidence": "moderate",
- "line_data_list": [
- {
- "line": "AKIAGIREOGIAXLSX4BT5",
- "line_num": 2,
- "path": "./tests/samples/aws_id.xlsx",
- "info": "./tests/samples/aws_id.xlsx|ZIP|xl/sharedStrings.xml|RAW",
- "value": "AKIAGIREOGIAXLSX4BT5",
- "value_start": 125,
- "value_end": 145,
- "variable": null,
- "variable_start": -2,
- "variable_end": -2,
- "entropy_validation": {
- "iterator": "BASE64_CHARS",
- "entropy": 3.6464393446710153,
- "valid": false
- }
- }
- ]
- },
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
@@ -11186,6 +11024,60 @@
}
]
},
+ {
+ "api_validation": "NOT_AVAILABLE",
+ "ml_validation": "NOT_AVAILABLE",
+ "ml_probability": null,
+ "rule": "AWS Client ID",
+ "severity": "high",
+ "confidence": "moderate",
+ "line_data_list": [
+ {
+ "line": "AKIAGIREOGIAXLSX4BT5userhostloginpassword\ud64d\uae38\ub3d9\uc804192.168.0.1adminH0NgGi1d0nGroot127.0.0.1rootiMr00TA1 password is w3Ry5tR0nGA2 ID:master,PW:dipPr10Gg!B3 192.168.0.1 master/NBd@126t!\uc8fc\uc778 FNAT-CC0TG_old10.53.51.17192.168.101.96377710.53.51.17192.168.101.9 63777 \uc8fc\uc778 FNAT-CC0TG_oldpassword:\u25a1 \ubb38\uc758 \ub0b4\uc6a9 : \u203b Error Stack Trace\ub3c4 \ud568\uaed8 \ucca8\ubd80 \ubd80\ud0c1\ub4dc\ub9bd\ub2c8\ub2e4.12345F16 224.52.124.93 root/A0dM1Nka",
+ "line_num": 2,
+ "path": "./tests/samples/sample.ods",
+ "info": "./tests/samples/sample.ods|ZIP|content.xml|RAW",
+ "value": "AKIAGIREOGIAXLSX4BT5",
+ "value_start": 7621,
+ "value_end": 7641,
+ "variable": null,
+ "variable_start": -2,
+ "variable_end": -2,
+ "entropy_validation": {
+ "iterator": "BASE64_CHARS",
+ "entropy": 3.6464393446710153,
+ "valid": false
+ }
+ }
+ ]
+ },
+ {
+ "api_validation": "NOT_AVAILABLE",
+ "ml_validation": "VALIDATED_KEY",
+ "ml_probability": 0.989,
+ "rule": "Password",
+ "severity": "medium",
+ "confidence": "moderate",
+ "line_data_list": [
+ {
+ "line": "AKIAGIREOGIAXLSX4BT5userhostloginpassword\ud64d\uae38\ub3d9\uc804192.168.0.1adminH0NgGi1d0nGroot127.0.0.1rootiMr00TA1 password is w3Ry5tR0nGA2 ID:master,PW:dipPr10Gg!B3 192.168.0.1 master/NBd@126t!\uc8fc\uc778 FNAT-CC0TG_old10.53.51.17192.168.101.96377710.53.51.17192.168.101.9 63777 \uc8fc\uc778 FNAT-CC0TG_oldpassword:\u25a1 \ubb38\uc758 \ub0b4\uc6a9 : \u203b Error Stack Trace\ub3c4 \ud568\uaed8 \ucca8\ubd80 \ubd80\ud0c1\ub4dc\ub9bd\ub2c8\ub2e4.12345F16 224.52.124.93 root/A0dM1Nka",
+ "line_num": 2,
+ "path": "./tests/samples/sample.ods",
+ "info": "./tests/samples/sample.ods|ZIP|content.xml|HTML",
+ "value": "dipPr10Gg!B3",
+ "value_start": 136,
+ "value_end": 148,
+ "variable": "PW",
+ "variable_start": 133,
+ "variable_end": 135,
+ "entropy_validation": {
+ "iterator": "BASE64_CHARS",
+ "entropy": 3.2862156256610597,
+ "valid": false
+ }
+ }
+ ]
+ },
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
@@ -11294,6 +11186,87 @@
}
]
},
+ {
+ "api_validation": "NOT_AVAILABLE",
+ "ml_validation": "NOT_AVAILABLE",
+ "ml_probability": null,
+ "rule": "UUID",
+ "severity": "info",
+ "confidence": "strong",
+ "line_data_list": [
+ {
+ "line": " Click to edit the title text format<footer><number><date/time>",
+ "line_num": 2,
+ "path": "./tests/samples/sample.pptx",
+ "info": "./tests/samples/sample.pptx|ZIP|ppt/slideMasters/slideMaster1.xml|RAW",
+ "value": "1B26FE4F-8819-409F-9556-40447A77EBF2",
+ "value_start": 3868,
+ "value_end": 3904,
+ "variable": null,
+ "variable_start": -2,
+ "variable_end": -2,
+ "entropy_validation": {
+ "iterator": "HEX_CHARS",
+ "entropy": 3.342171793538618,
+ "valid": true
+ }
+ }
+ ]
+ },
+ {
+ "api_validation": "NOT_AVAILABLE",
+ "ml_validation": "NOT_AVAILABLE",
+ "ml_probability": null,
+ "rule": "UUID",
+ "severity": "info",
+ "confidence": "strong",
+ "line_data_list": [
+ {
+ "line": " Footer<#>",
+ "line_num": 2,
+ "path": "./tests/samples/sample.pptx",
+ "info": "./tests/samples/sample.pptx|ZIP|ppt/slideLayouts/slideLayout1.xml|RAW",
+ "value": "42F61B0C-09B2-455B-8854-E1D3A3979B74",
+ "value_start": 2610,
+ "value_end": 2646,
+ "variable": null,
+ "variable_start": -2,
+ "variable_end": -2,
+ "entropy_validation": {
+ "iterator": "HEX_CHARS",
+ "entropy": 3.5535506956063068,
+ "valid": true
+ }
+ }
+ ]
+ },
+ {
+ "api_validation": "NOT_AVAILABLE",
+ "ml_validation": "NOT_AVAILABLE",
+ "ml_probability": null,
+ "rule": "AWS Client ID",
+ "severity": "high",
+ "confidence": "moderate",
+ "line_data_list": [
+ {
+ "line": " Follow the white rabbitAKIAGIREOGIPPTX1Y45X",
+ "line_num": 2,
+ "path": "./tests/samples/sample.pptx",
+ "info": "./tests/samples/sample.pptx|ZIP|ppt/slides/slide1.xml|RAW",
+ "value": "AKIAGIREOGIPPTX1Y45X",
+ "value_start": 2403,
+ "value_end": 2423,
+ "variable": null,
+ "variable_start": -2,
+ "variable_end": -2,
+ "entropy_validation": {
+ "iterator": "BASE64_CHARS",
+ "entropy": 3.6841837197791887,
+ "valid": false
+ }
+ }
+ ]
+ },
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "VALIDATED_KEY",
@@ -11321,6 +11294,141 @@
}
]
},
+ {
+ "api_validation": "NOT_AVAILABLE",
+ "ml_validation": "VALIDATED_KEY",
+ "ml_probability": 0.886,
+ "rule": "Password",
+ "severity": "medium",
+ "confidence": "moderate",
+ "line_data_list": [
+ {
+ "line": "AKIAGIREOGIAXLSX4BT5userhostloginpassword \ud64d\uae38\ub3d9\uc804192.168.0.1adminH0NgGi1d0nGroot127.0.0.1iMr00TA1 password is w3Ry5tR0nGA2 ID:master,PW:dipPr10Gg!B3 192.168.0.1 master/NBd@126t! \uc8fc\uc778 FNAT-CC0TG_old10.53.51.17192.168.101.9 63777password:
\u25a1 \ubb38\uc758 \ub0b4\uc6a9 :
\u203b Error Stack Trace\ub3c4 \ud568\uaed8 \ucca8\ubd80 \ubd80\ud0c1\ub4dc\ub9bd\ub2c8\ub2e4.
12345F16 224.52.124.93 root/A0dM1Nka",
+ "line_num": 2,
+ "path": "./tests/samples/sample.xlsx",
+ "info": "./tests/samples/sample.xlsx|ZIP|xl/sharedStrings.xml|RAW",
+ "value": "
",
+ "value_start": 1163,
+ "value_end": 1167,
+ "variable": "password",
+ "variable_start": 1154,
+ "variable_end": 1162,
+ "entropy_validation": {
+ "iterator": "BASE64_CHARS",
+ "entropy": 1.0,
+ "valid": false
+ }
+ }
+ ]
+ },
+ {
+ "api_validation": "NOT_AVAILABLE",
+ "ml_validation": "NOT_AVAILABLE",
+ "ml_probability": null,
+ "rule": "UUID",
+ "severity": "info",
+ "confidence": "strong",
+ "line_data_list": [
+ {
+ "line": "Sheet4x3!$A$1:$D$4",
+ "line_num": 2,
+ "path": "./tests/samples/sample.xlsx",
+ "info": "./tests/samples/sample.xlsx|ZIP|xl/workbook.xml|RAW",
+ "value": "7626C862-2A13-11E5-B345-FEFF819CDC9F",
+ "value_start": 1015,
+ "value_end": 1051,
+ "variable": null,
+ "variable_start": -2,
+ "variable_end": -2,
+ "entropy_validation": {
+ "iterator": "HEX_CHARS",
+ "entropy": 3.4770260427684323,
+ "valid": true
+ }
+ }
+ ]
+ },
+ {
+ "api_validation": "NOT_AVAILABLE",
+ "ml_validation": "NOT_AVAILABLE",
+ "ml_probability": null,
+ "rule": "AWS Client ID",
+ "severity": "high",
+ "confidence": "moderate",
+ "line_data_list": [
+ {
+ "line": "AKIAGIREOGIAXLSX4BT5userhostloginpassword \ud64d\uae38\ub3d9\uc804192.168.0.1adminH0NgGi1d0nGroot127.0.0.1iMr00TA1 password is w3Ry5tR0nGA2 ID:master,PW:dipPr10Gg!B3 192.168.0.1 master/NBd@126t! \uc8fc\uc778 FNAT-CC0TG_old10.53.51.17192.168.101.9 63777password:
\u25a1 \ubb38\uc758 \ub0b4\uc6a9 :
\u203b Error Stack Trace\ub3c4 \ud568\uaed8 \ucca8\ubd80 \ubd80\ud0c1\ub4dc\ub9bd\ub2c8\ub2e4.
12345F16 224.52.124.93 root/A0dM1Nka",
+ "line_num": 2,
+ "path": "./tests/samples/sample.xlsx",
+ "info": "./tests/samples/sample.xlsx|ZIP|xl/sharedStrings.xml|RAW",
+ "value": "AKIAGIREOGIAXLSX4BT5",
+ "value_start": 127,
+ "value_end": 147,
+ "variable": null,
+ "variable_start": -2,
+ "variable_end": -2,
+ "entropy_validation": {
+ "iterator": "BASE64_CHARS",
+ "entropy": 3.6464393446710153,
+ "valid": false
+ }
+ }
+ ]
+ },
+ {
+ "api_validation": "NOT_AVAILABLE",
+ "ml_validation": "VALIDATED_KEY",
+ "ml_probability": 0.899,
+ "rule": "Password",
+ "severity": "medium",
+ "confidence": "moderate",
+ "line_data_list": [
+ {
+ "line": "AKIAGIREOGIAXLSX4BT5userhostloginpassword \ud64d\uae38\ub3d9\uc804192.168.0.1adminH0NgGi1d0nGroot127.0.0.1iMr00TA1 password is w3Ry5tR0nGA2 ID:master,PW:dipPr10Gg!B3 192.168.0.1 master/NBd@126t! \uc8fc\uc778 FNAT-CC0TG_old10.53.51.17192.168.101.9 63777password:
\u25a1 \ubb38\uc758 \ub0b4\uc6a9 :
\u203b Error Stack Trace\ub3c4 \ud568\uaed8 \ucca8\ubd80 \ubd80\ud0c1\ub4dc\ub9bd\ub2c8\ub2e4.
12345F16 224.52.124.93 root/A0dM1Nka",
+ "line_num": 2,
+ "path": "./tests/samples/sample.xlsx",
+ "info": "./tests/samples/sample.xlsx|ZIP|xl/sharedStrings.xml|RAW",
+ "value": "dipPr10Gg!