From 9edeba1a01b34658c42624340eec7ecaa65749a9 Mon Sep 17 00:00:00 2001 From: mbellifa Date: Thu, 9 Jan 2025 17:25:42 -0700 Subject: [PATCH 01/10] Added index generation script and workflow hook --- .github/workflows/release.yml | 11 +- .scripts/generate_indexes.py | 371 ++++++++++++++++++++++++++++++++++ .scripts/requirements.txt | 1 + indexes/README.md | 1 + 4 files changed, 383 insertions(+), 1 deletion(-) create mode 100644 .scripts/generate_indexes.py create mode 100644 .scripts/requirements.txt create mode 100644 indexes/README.md diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1df3dd362..7830e31f6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: permissions: - contents: read + contents: read, write jobs: release: @@ -32,6 +32,15 @@ jobs: uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b #v5.3.0 with: python-version: "3.11" + - name: Install dependencies + run: pip3 install -r ./scripts/.requirements.txt + - name: Generate indexes + run: python3 ./scripts/generate_indexes.py + - name: Commit Indexes + run: | + git add ./indexes + git commit -m "Update indexes" + git push - name: Create new panther-analysis release run: | export AWS_REGION=${{ secrets.AWS_REGION }} diff --git a/.scripts/generate_indexes.py b/.scripts/generate_indexes.py new file mode 100644 index 000000000..0b82c74b5 --- /dev/null +++ b/.scripts/generate_indexes.py @@ -0,0 +1,371 @@ +""" +Analyzes all YAML files in the panther-analysis directory and generates a +detections-coverage.json file with markdown indexes in the indexes directory +""" + +import collections, json, hashlib, pathlib, yaml, os, itertools + + +def generate_indexes(directory): + """ + Generates JSON and Markdown indexes, directory points to the root directory of the repo + """ + + detections = {} + query_lookup = {} # Maps QueryNames to their YAML + logtype_lookup = { + # Maps tableified names for all LogTypes e.g. onepassword_signinattempt => OnePassword.SignInAttempt this is used to extract "Log Types" from queries + 'crowdstrike_aidmaster': 'Crowdstrike.AIDMaster', + 'snowflake.account_usage': 'Snowflake.AccountUsage' + } + + for root, subdirectories, files in os.walk(directory): + for file in files: + if '/rules' in root or '/policies' in root or '/queries' in root or '/simple_rules' in root or '/correlation_rules' in root: + if file[-4:] == '.yml': + yaml_path = os.path.join(root, file) + detection_name = file.replace('.yml', '') + detection_yaml = ingest_yaml(yaml_path) + if 'QueryName' in detection_yaml: + query_lookup[detection_yaml['QueryName']] = detection_yaml + if 'RuleID' in detection_yaml: + query_lookup[detection_yaml['RuleID']] = detection_yaml + if 'LogTypes' in detection_yaml: + for log_type in detection_yaml['LogTypes']: + logtype_lookup[log_type.lower().replace('.', '_')] = log_type + detection_yaml = analyze_yaml(detection_yaml) + if 'AnalysisType' not in detection_yaml: + continue + if detection_yaml['AnalysisType'] in ('datamodel', 'global', 'pack', 'lookup_table'): + continue + if detection_yaml['DisplayName'] == '' or 'deprecated' in detection_yaml['DisplayName'].lower(): + continue + # May want to revisit this, filtering out signals + if 'signal - ' in detection_yaml['DisplayName'].lower(): + continue + # Filter out query names like Query.Snowflake.BruteForceByIp which are often called by other rules + if 'query.' in detection_yaml['DisplayName'].lower(): + continue + if 'Description' not in detection_yaml or detection_yaml['Description'] == '': + detection_yaml['Description'] = '' + # We previously continued here to filter out blank descriptions but now we only filter out the AWS Cloudtrail 2 minute count query that's not enabled by defualt + if 'Enabled' not in detection_yaml or detection_yaml['Enabled'] == False: + continue + + detections[detection_name] = detection_yaml + detections[detection_name]['YAMLPath'] = yaml_path.replace(str(directory), '').strip('/') # Relative path from root + + print('Successfully analyzed ' + str(len(detections.keys())) + ' detections!') + + save_website_json(detections, query_lookup, logtype_lookup, pathlib.Path(directory) / 'indexes' / 'detection-coverage.json') + write_alpha_index(detections, query_lookup, logtype_lookup, pathlib.Path(directory)) + + +def ingest_yaml(path): + with open(path) as file: + detection = yaml.full_load(file) + return detection + + +def analyze_yaml(detection_yaml): + rv = {} + if 'Enabled' in detection_yaml: + rv["Enabled"] = detection_yaml["Enabled"] + + if 'RuleID' in detection_yaml: + rv["Name"] = detection_yaml["RuleID"] + elif 'PolicyID' in detection_yaml: + rv["Name"] = detection_yaml["PolicyID"] + elif 'QueryName' in detection_yaml: + rv["Name"] = detection_yaml["QueryName"] + + if 'AnalysisType' in detection_yaml: + rv["AnalysisType"] = detection_yaml["AnalysisType"] + display_name = '' + if 'QueryName' in detection_yaml: + display_name = detection_yaml["QueryName"] + elif 'DisplayName' in detection_yaml: + display_name = detection_yaml["DisplayName"] + if display_name == '' and 'RuleID' in detection_yaml: + display_name = detection_yaml["RuleID"] + display_name = display_name.replace('--', '~') + if display_name == '': + raise Exception("AHH " + repr(detection_yaml)) + if display_name[0] == "'": # If the whole name is in single quotes remove them + display_name = display_name.strip("'") + rv["DisplayName"] = display_name + + if 'ResourceTypes' in detection_yaml: + rv["LogTypes"] = detection_yaml["ResourceTypes"] + elif 'LogTypes' in detection_yaml: + rv["LogTypes"] = detection_yaml["LogTypes"] + + if 'ScheduledQueries' in detection_yaml: + rv['ScheduledQueries'] = detection_yaml['ScheduledQueries'] + + if 'Query' in detection_yaml: + rv['Query'] = detection_yaml['Query'] + + if 'SnowflakeQuery' in detection_yaml: + rv['SnowflakeQuery'] = detection_yaml['SnowflakeQuery'] + + if 'Description' in detection_yaml: + # strip newlines from end + rv["Description"] = detection_yaml["Description"].strip() + # strip newlines embedded in strings + rv["Description"] = rv["Description"].replace("\n", "").replace("''", "'") + + if 'Tags' in detection_yaml: + rv["Tags"] = detection_yaml["Tags"] + + if 'Detection' in detection_yaml: + rv['Detection'] = detection_yaml['Detection'] + + return rv + + +# https://stackoverflow.com/a/44873382 +def sha256sum(filename): + h = hashlib.sha256() + b = bytearray(128 * 1024) + mv = memoryview(b) + with open(filename, 'rb', buffering=0) as f: + while n := f.readinto(mv): + h.update(mv[:n]) + return h.hexdigest() + + +def extract_logtypes_from_sql(sql, logtype_lookup): + logtypes = [] + sql = sql.lower() + for db_name, log_type in logtype_lookup.items(): + db_name = db_name.lower() + if db_name in sql and log_type not in logtypes: + logtypes.append(log_type) + return logtypes + + +def extract_log_types_from_yaml(yaml, query_lookup, logtype_lookup): + if 'LogTypes' in yaml: + return yaml['LogTypes'] + if 'ScheduledQueries' in yaml: + for query in yaml['ScheduledQueries']: + if query in query_lookup: + query_yaml = query_lookup[query] + sql = '' + if 'Query' in query_yaml: + sql = query_yaml['Query'] + elif 'SnowflakeQuery' in query_yaml: + sql = query_yaml['SnowflakeQuery'] + return extract_logtypes_from_sql(sql, logtype_lookup) + + if 'Query' in yaml: + return extract_logtypes_from_sql(yaml['Query'], logtype_lookup) + if 'SnowflakeQuery' in yaml: + return extract_logtypes_from_sql(yaml['SnowflakeQuery'], logtype_lookup) + if yaml['AnalysisType'] == 'correlation_rule' and 'Detection' in yaml: + log_types = [] + for detection in yaml['Detection']: + if 'Sequence' in detection: + for seq in detection['Sequence']: + if 'RuleID' not in seq: + continue + rule_yaml = query_lookup[seq['RuleID']] + extracted_log_types = extract_log_types_from_yaml(rule_yaml, query_lookup, logtype_lookup) + if extracted_log_types is None: + print('*** ERROR ***') + print(repr(rule_yaml)) + + for log_type in extracted_log_types: + if log_type not in log_types: + log_types.append(log_type) + if 'Group' in detection: + for group in detection['Group']: + if 'RuleID' not in group: + continue + rule_yaml = query_lookup[group['RuleID']] + extracted_log_types = extract_log_types_from_yaml(rule_yaml, query_lookup, logtype_lookup) + if extracted_log_types is None: + print('*** ERROR ***') + print(repr(rule_yaml)) + + for log_type in extracted_log_types: + if log_type not in log_types: + log_types.append(log_type) + return log_types + + +# We use this to prefer showing the Scheduled Rules over their associated Query when they share the same name +def entry_scoring(entry): + score = 0 + if entry['AnalysisType'] == 'Scheduled Query': + score += 3 + if entry['AnalysisType'] == 'Scheduled Rule': + score += 2 + if entry['AnalysisType'] == 'Rule': + score += 1 + return score + +def group_by(iterable, key=None): + # Uses itertools.groupby to produce a dictionary where each key is the grouping defined by the key function + if key is None: + key = lambda x: x + result = {} + groups = itertools.groupby(iterable, key=key) + for k, g in groups: + result[k] = list(g) + return result + + +def save_website_json(detections, query_lookup, logtype_lookup, json_path): + json_export = [] + detection_types = collections.Counter() + + for d in detections.values(): + json_slice = {key: d[key] for key in + d.keys() & {'DisplayName', 'LogTypes', 'Description', 'AnalysisType', 'YAMLPath'}} + # Clean up analysis type e.g. rule -> Rule and scheduled_rule -> Scheduled Rule + json_slice['AnalysisType'] = ' '.join([x.capitalize() for x in json_slice['AnalysisType'].split('_')]) + if 'LogTypes' not in d or len(d['LogTypes']) == 0: + print("Extracting") + print(json_slice) + json_slice['LogTypes'] = extract_log_types_from_yaml(d, query_lookup, logtype_lookup) + print(json_slice) + + detection_types[json_slice['AnalysisType']] += 1 + if 'LogTypes' in json_slice: + print(json_slice) + json_slice['LogTypes'].sort() + json_export.append(json_slice) + name_map = {} + for x in json_export: + name = x['DisplayName'].lower() + if name not in name_map: + name_map[name] = [] + name_map[name].append(x) + json_export = [] + for name in name_map: + name_map[name] = list(sorted(name_map[name], key=entry_scoring, reverse=True)) + json_export.append(name_map[name][0]) + + json_export = list(sorted(json_export, key=lambda x: x['DisplayName'].lower())) + print("Writing detections-web-export.json...") + with open(json_path, 'w') as fp: + json.dump(json_export, fp, sort_keys=True) + print(f"Total: {sum(detection_types.values())}") + +# Splits the first part of a log type off to form a heading +def logtype_to_pretty(log_type): + log_type_split = log_type.split('.') + aliases = { + 'Amazon.EKS': 'AWS EKS', + 'Gravitational.Teleport': 'Teleport', + 'GSuite': 'Google Workspace' + } + if log_type_split[0] == 'AWS': + return f"{log_type_split[0]} {log_type_split[1]}" + for alias, pretty in aliases.items(): + if alias in log_type: + return pretty + return log_type_split[0] + + +def write_alpha_index(detections, query_lookup, logtype_lookup, root_dir): + # Map each detection to each of its log types, then write an alphabetic index of all log types + logtype_mapping = {} + valid_detections = [] + for d in detections.values(): + json_slice = {key: d[key] for key in + d.keys() & {'DisplayName', 'LogTypes', 'Description', 'AnalysisType', 'YAMLPath'}} + + if 'LogTypes' not in d or len(d['LogTypes']) == 0: + json_slice['LogTypes'] = extract_log_types_from_yaml(d, query_lookup, logtype_lookup) + valid_detections.append(json_slice) + + # Dedupe detections by DisplayName + name_map = group_by(valid_detections, key=lambda x: x['DisplayName'].lower()) + standard_rules = [] + json_export = [] + for name in name_map: + name_map[name] = list(sorted(name_map[name], key=entry_scoring, reverse=True)) + winner = name_map[name][0] + + headings = set(map(logtype_to_pretty, winner['LogTypes'])) + for log_type in headings: + if log_type not in logtype_mapping: + logtype_mapping[log_type] = [] + logtype_mapping[log_type].append(winner) + if 'standard_rules' in winner['YAMLPath']: + winner['Headings'] = headings + standard_rules.append(winner) + json_export.append(name_map[name][0]) + + output = "# Alpha Index\n\n" + letter_buckets = group_by(sorted(logtype_mapping.keys()), key=lambda x: x[0].upper()) + letters = sorted(letter_buckets.keys()) + #log_type.replace('.', '').replace(' ', '-').lower() + for letter in letters: + output += f"- [{letter}](#{letter})\n" + + for letter in letters: + output += f"# {letter}\n\n" + for log_type in sorted(letter_buckets[letter]): + output += f"- [{log_type}](#{log_type.replace('.', '').replace(' ', '-').lower()})\n" + output += "\n\n" + for log_type in sorted(letter_buckets[letter]): + output += f"## {log_type}\n\n" + logtype_mapping[log_type] = sorted(logtype_mapping[log_type], key=lambda x: x['DisplayName'].lower()) + for detection in logtype_mapping[log_type]: + output += f"- [{detection['DisplayName']}](../{detection['YAMLPath']})\n" + output += "\n\n" + + with open(root_dir / 'indexes' / 'alpha-index.md', 'w') as fp: + fp.write(output) + + index_files = { + 'aws': ['AWS'], + 'gcp': ['GCP'], + 'github': ['GitHub'], + 'gworkspace': ['Google Workspace'], + 'okta': ['Okta'], + 'onelogin': ['OneLogin'], + 'onepass': ['OnePassword'], + 'osquery': ['Osquery'], + 'saas': ['Box', 'Dropbox', 'Google Workspace', 'Microsoft 365', 'Okta', 'OneLogin', 'Salesforce', 'Slack', 'Teleport', 'Zoom', 'Zendesk'], + 'snowflake': ['Snowflake'], + } + all_log_types = sorted(logtype_mapping.keys()) + for index_file, log_types in index_files.items(): + output = "" + + for log_type in all_log_types: + if not any([log_type.startswith(prefix) for prefix in log_types]): + continue + output += f"## {log_type}\n\n" + logtype_mapping[log_type] = sorted(logtype_mapping[log_type], key=lambda x: x['DisplayName'].lower()) + for detection in logtype_mapping[log_type]: + output += f"- [{detection['DisplayName']}](../{detection['YAMLPath']})\n" + output += "\n\n" + with open(root_dir / 'indexes' / f'{index_file}.md', 'w') as fp: + fp.write(output) + + # Write out the standard rules, this is a separate case because we list each log type below the detection + output = """## Panther Standard Detections + +### Supported Log Types are listed below each detection\n\n""" + standard_rules = sorted(standard_rules, key=lambda x: x['DisplayName'].lower()) + for detection in standard_rules: + output += f"[{detection['DisplayName']}](../{detection['YAMLPath']}) \n" + if detection['Description']: + output += f"{detection['Description']}\n" + + for heading in detection['Headings']: + output += f" - {heading}\n" + output += "\n\n" + with open(root_dir / 'indexes' / 'standard.md', 'w') as fp: + fp.write(output) + +if __name__ == '__main__': + # Assume that this script is in the .scripts directory and we want to run on the root of the repo + root_dir = pathlib.Path(__file__).parent.parent.resolve() + generate_indexes(root_dir) diff --git a/.scripts/requirements.txt b/.scripts/requirements.txt new file mode 100644 index 000000000..dbfc7099c --- /dev/null +++ b/.scripts/requirements.txt @@ -0,0 +1 @@ +PyYAML \ No newline at end of file diff --git a/indexes/README.md b/indexes/README.md new file mode 100644 index 000000000..e403ea72f --- /dev/null +++ b/indexes/README.md @@ -0,0 +1 @@ +Files in this directory are automatically generated from the YAML metadata. \ No newline at end of file From bc84cfd82773be099073fc2c6472bc7e61882d76 Mon Sep 17 00:00:00 2001 From: mbellifa Date: Thu, 9 Jan 2025 17:30:36 -0700 Subject: [PATCH 02/10] Correct write permission YAML --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7830e31f6..8a940dd19 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: permissions: - contents: read, write + contents: write jobs: release: From 026357fed87f56346d15b1dd8315ebb87b733f90 Mon Sep 17 00:00:00 2001 From: mbellifa Date: Thu, 9 Jan 2025 17:45:03 -0700 Subject: [PATCH 03/10] Fix requirements.txt typo --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8a940dd19..5d8ad4709 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -33,7 +33,7 @@ jobs: with: python-version: "3.11" - name: Install dependencies - run: pip3 install -r ./scripts/.requirements.txt + run: pip3 install -r ./.scripts/requirements.txt - name: Generate indexes run: python3 ./scripts/generate_indexes.py - name: Commit Indexes From 8be6c60e99925fb655cf62f43780e44253cfb2d1 Mon Sep 17 00:00:00 2001 From: mbellifa Date: Thu, 9 Jan 2025 17:48:11 -0700 Subject: [PATCH 04/10] Fixing yet another .scripts typo --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5d8ad4709..d676e07f8 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -35,7 +35,7 @@ jobs: - name: Install dependencies run: pip3 install -r ./.scripts/requirements.txt - name: Generate indexes - run: python3 ./scripts/generate_indexes.py + run: python3 ./.scripts/generate_indexes.py - name: Commit Indexes run: | git add ./indexes From 21d761c25fa534c1e562e84ae041d8cfe6cffbe3 Mon Sep 17 00:00:00 2001 From: mbellifa Date: Thu, 9 Jan 2025 17:52:07 -0700 Subject: [PATCH 05/10] Remove debug statements --- .scripts/generate_indexes.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.scripts/generate_indexes.py b/.scripts/generate_indexes.py index 0b82c74b5..998605534 100644 --- a/.scripts/generate_indexes.py +++ b/.scripts/generate_indexes.py @@ -227,14 +227,10 @@ def save_website_json(detections, query_lookup, logtype_lookup, json_path): # Clean up analysis type e.g. rule -> Rule and scheduled_rule -> Scheduled Rule json_slice['AnalysisType'] = ' '.join([x.capitalize() for x in json_slice['AnalysisType'].split('_')]) if 'LogTypes' not in d or len(d['LogTypes']) == 0: - print("Extracting") - print(json_slice) json_slice['LogTypes'] = extract_log_types_from_yaml(d, query_lookup, logtype_lookup) - print(json_slice) detection_types[json_slice['AnalysisType']] += 1 if 'LogTypes' in json_slice: - print(json_slice) json_slice['LogTypes'].sort() json_export.append(json_slice) name_map = {} From 8920027295603f7b49be815066d325727ec9b1c9 Mon Sep 17 00:00:00 2001 From: mbellifa Date: Thu, 9 Jan 2025 17:56:09 -0700 Subject: [PATCH 06/10] Add name and email config --- .github/workflows/release.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d676e07f8..c2df53221 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -38,6 +38,8 @@ jobs: run: python3 ./.scripts/generate_indexes.py - name: Commit Indexes run: | + git config user.email "noreply@panther.com" + git config user.name "Panther Bot" git add ./indexes git commit -m "Update indexes" git push From d5a3c27edf0ade960edb41fbf8da9e2af8d302e6 Mon Sep 17 00:00:00 2001 From: mbellifa Date: Thu, 9 Jan 2025 17:59:29 -0700 Subject: [PATCH 07/10] Apply write permission to the job itself --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c2df53221..bbec6eb44 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest permissions: id-token: write - contents: read + contents: write env: GITHUB_TOKEN: ${{ secrets.PANTHER_BOT_AUTOMATION_TOKEN }} steps: From 6a15f703e1c79196624adb1b7e572b3db56a3a77 Mon Sep 17 00:00:00 2001 From: mbellifa Date: Mon, 13 Jan 2025 12:20:13 -0700 Subject: [PATCH 08/10] Added no-sign switch for git push --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index bbec6eb44..496b8158e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -42,7 +42,7 @@ jobs: git config user.name "Panther Bot" git add ./indexes git commit -m "Update indexes" - git push + git push --no-sign - name: Create new panther-analysis release run: | export AWS_REGION=${{ secrets.AWS_REGION }} From 6735e9a21abd31b965220a5a1ba62760c2a7af40 Mon Sep 17 00:00:00 2001 From: mbellifa Date: Fri, 17 Jan 2025 13:26:32 -0700 Subject: [PATCH 09/10] Adds descriptions to markdown indexes --- .scripts/generate_indexes.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.scripts/generate_indexes.py b/.scripts/generate_indexes.py index 998605534..db12e7784 100644 --- a/.scripts/generate_indexes.py +++ b/.scripts/generate_indexes.py @@ -299,7 +299,6 @@ def write_alpha_index(detections, query_lookup, logtype_lookup, root_dir): output = "# Alpha Index\n\n" letter_buckets = group_by(sorted(logtype_mapping.keys()), key=lambda x: x[0].upper()) letters = sorted(letter_buckets.keys()) - #log_type.replace('.', '').replace(' ', '-').lower() for letter in letters: output += f"- [{letter}](#{letter})\n" @@ -313,6 +312,8 @@ def write_alpha_index(detections, query_lookup, logtype_lookup, root_dir): logtype_mapping[log_type] = sorted(logtype_mapping[log_type], key=lambda x: x['DisplayName'].lower()) for detection in logtype_mapping[log_type]: output += f"- [{detection['DisplayName']}](../{detection['YAMLPath']})\n" + if len(detection['Description']) > 3: + output += f" - {detection['Description']}\n" output += "\n\n" with open(root_dir / 'indexes' / 'alpha-index.md', 'w') as fp: @@ -341,6 +342,8 @@ def write_alpha_index(detections, query_lookup, logtype_lookup, root_dir): logtype_mapping[log_type] = sorted(logtype_mapping[log_type], key=lambda x: x['DisplayName'].lower()) for detection in logtype_mapping[log_type]: output += f"- [{detection['DisplayName']}](../{detection['YAMLPath']})\n" + if len(detection['Description']) > 3: + output += f" - {detection['Description']}\n" output += "\n\n" with open(root_dir / 'indexes' / f'{index_file}.md', 'w') as fp: fp.write(output) From 6d6199a4e5bf390751cae321bdab4ea4c13a2c34 Mon Sep 17 00:00:00 2001 From: mbellifa Date: Mon, 27 Jan 2025 10:47:21 -0700 Subject: [PATCH 10/10] * Split index generation into its own workflow away from the release cutting flow * Updated Panther Bot username/email * Sorted headings for standard rules so files remain stable if there are no changes --- .github/workflows/generate-indexes.yml | 43 ++++++++++++++++++++++++++ .github/workflows/release.yml | 15 ++------- .scripts/generate_indexes.py | 7 +++-- 3 files changed, 50 insertions(+), 15 deletions(-) create mode 100644 .github/workflows/generate-indexes.yml diff --git a/.github/workflows/generate-indexes.yml b/.github/workflows/generate-indexes.yml new file mode 100644 index 000000000..dc7579dca --- /dev/null +++ b/.github/workflows/generate-indexes.yml @@ -0,0 +1,43 @@ +name: Generate Indexes + +on: + # Since this workflow runs on push to develop and also pushes to develop, we need ensure that it does not loop This + # is done by ignoring changes to the indexes directory. The script below must never write outside of this directory. + push: + branches: + - develop + paths-ignore: + - 'indexes/**' + +permissions: + contents: write + +jobs: + generate-indexes: + runs-on: ubuntu-latest + permissions: + id-token: write + contents: write + env: + GITHUB_TOKEN: ${{ secrets.PANTHER_BOT_AUTOMATION_TOKEN }} + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 #v4.2.2 + with: + fetch-depth: 0 + token: ${{ env.GITHUB_TOKEN || github.token }} + - name: Install Python + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b #v5.3.0 + with: + python-version: "3.11" + - name: Install dependencies + run: pip3 install -r ./.scripts/requirements.txt + - name: Generate indexes + run: python3 ./.scripts/generate_indexes.py + - name: Commit Indexes + continue-on-error: true # This is to ensure that the workflow does not fail if there are no changes to commit + run: | + git config --global user.email "github-service-account-automation@panther.io" + git config --global user.name "panther-bot-automation" + git add ./indexes + git commit -m "Update indexes" + git push --no-sign \ No newline at end of file diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1bd155041..e3f5fa2dd 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -4,14 +4,14 @@ on: workflow_dispatch: permissions: - contents: write + contents: read jobs: release: runs-on: ubuntu-latest permissions: id-token: write - contents: write + contents: read env: GITHUB_TOKEN: ${{ secrets.PANTHER_BOT_AUTOMATION_TOKEN }} steps: @@ -32,17 +32,6 @@ jobs: uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b #v5.3.0 with: python-version: "3.11" - - name: Install dependencies - run: pip3 install -r ./.scripts/requirements.txt - - name: Generate indexes - run: python3 ./.scripts/generate_indexes.py - - name: Commit Indexes - run: | - git config user.email "noreply@panther.com" - git config user.name "Panther Bot" - git add ./indexes - git commit -m "Update indexes" - git push --no-sign - name: Create new panther-analysis release run: | export AWS_REGION=${{ secrets.AWS_REGION }} diff --git a/.scripts/generate_indexes.py b/.scripts/generate_indexes.py index db12e7784..72c99e9ff 100644 --- a/.scripts/generate_indexes.py +++ b/.scripts/generate_indexes.py @@ -1,6 +1,9 @@ """ Analyzes all YAML files in the panther-analysis directory and generates a -detections-coverage.json file with markdown indexes in the indexes directory +detections-coverage.json file with markdown indexes in the indexes directory. + +IMPORTANT: It's assumed that this script will never write outside of the indexes directory. +Breaking this assumption could cause an infinite GitHub Actions loop. """ import collections, json, hashlib, pathlib, yaml, os, itertools @@ -292,7 +295,7 @@ def write_alpha_index(detections, query_lookup, logtype_lookup, root_dir): logtype_mapping[log_type] = [] logtype_mapping[log_type].append(winner) if 'standard_rules' in winner['YAMLPath']: - winner['Headings'] = headings + winner['Headings'] = list(sorted(headings)) standard_rules.append(winner) json_export.append(name_map[name][0])