Skip to content

New rules markup

New rules markup #517

Workflow file for this run

# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: Diff review for markup
on:
pull_request:
branches: [ main ]
jobs:
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
download_repos:
runs-on: ubuntu-latest
steps:
- name: Checkout CredData
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Markup hashing
run: |
md5sum snapshot.yaml >head_checksums.md5
for f in $(find meta -type f|sort); do md5sum $f; done >>head_checksums.md5
for f in $(find . -maxdepth 1 -type f -name "*.py"|sort); do md5sum $f; done >>head_checksums.md5
cat head_checksums.md5
sha256sum head_checksums.md5
- name: Cache head review
id: cache-data
uses: actions/cache@v4
with:
path: |
review_head.txt
review_head.html
key: cred-data-${{ hashFiles('head_checksums.md5') }}
- name: Cache tmp
if: steps.cache-data.outputs.cache-hit != 'true'
id: cache-tmp
uses: actions/cache@v4
with:
path: tmp
key: cred-data-${{ hashFiles('snapshot.yaml') }}
- name: install ansi2html
if: steps.cache-data.outputs.cache-hit != 'true'
run: sudo apt update && sudo apt install colorized-logs
- name: Set up Python 3.10
if: steps.cache-data.outputs.cache-hit != 'true'
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Produce review report from HEAD
if: steps.cache-data.outputs.cache-hit != 'true'
run: |
python -m pip install --upgrade pip
python -m pip install --requirement requirements.txt
# skip extra hints from git
git config --global init.defaultBranch work
python download_data.py --data_dir data --jobs $(nproc)
python review_data.py &>review_head.txt
ansi2html --style 'pre {font-family: monospace; font-size: large}' <review_head.txt >review_head.html
- name: Produce benchmark scores from empty report to check markup only
if: steps.cache-data.outputs.cache-hit != 'true'
run: |
python -m benchmark --scanner credsweeper --load .ci/empty_report.json >benchmark.txt
diff --unified=3 --ignore-all-space --ignore-blank-lines .ci/benchmark.txt benchmark.txt
- name: Upload artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: review_head
path: |
review_head.txt
review_head.html
benchmark.txt
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
review_base:
needs: [ download_repos ]
runs-on: ubuntu-latest
steps:
- name: Checkout CredData
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.base.sha }}
- name: Markup hashing
run: |
md5sum snapshot.yaml >base_checksums.md5
for f in $(find meta -type f|sort); do md5sum $f; done >>base_checksums.md5
for f in $(find . -maxdepth 1 -type f -name "*.py"|sort); do md5sum $f; done >>base_checksums.md5
cat base_checksums.md5
sha256sum base_checksums.md5
- name: Cache base review
id: cache-data
uses: actions/cache@v4
with:
path: |
review_base.txt
review_base.html
key: cred-data-${{ hashFiles('base_checksums.md5') }}
- name: Cache tmp
if: steps.cache-data.outputs.cache-hit != 'true'
id: cache-tmp
uses: actions/cache@v4
with:
path: tmp
key: cred-data-${{ hashFiles('snapshot.yaml') }}
- name: install ansi2html
if: steps.cache-data.outputs.cache-hit != 'true'
run: sudo apt update && sudo apt install colorized-logs
- name: Set up Python 3.10
if: steps.cache-data.outputs.cache-hit != 'true'
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Produce review report from HEAD
if: steps.cache-data.outputs.cache-hit != 'true'
run: |
python -m pip install --upgrade pip
python -m pip install --requirement requirements.txt
python download_data.py --data_dir data --jobs $(nproc)
python review_data.py &>review_base.txt
ansi2html --style 'pre {font-family: monospace; font-size: large}' <review_base.txt >review_base.html
- name: Upload artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: review_base
path: |
review_base.txt
review_base.html
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
diff_review:
needs: [ download_repos, review_base ]
runs-on: ubuntu-latest
steps:
- name: install ansi2html
run: sudo apt update && sudo apt install colorized-logs
- name: Download all workflow run artifacts
uses: actions/download-artifact@v4
- name: Get diff for review
run: |
# in case of difference - diff returns failure
if ! diff --unified=1 --color=auto review_base/review_base.txt review_head/review_head.txt &>review_diff.txt; then
cat review_diff.txt | ansi2html --style 'pre {font-family: monospace; font-size: large}' >review_diff.html
else
touch review_diff.html
fi
- name: Upload artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: review_diff
path: |
review_diff.txt
review_diff.html
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
markup_report_test:
runs-on: ubuntu-latest
steps:
- name: Checkout CredData
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Report markup test
run: |
echo '[
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "VALIDATED_KEY",
"ml_probability": 0.5,
"rule": "URL Credentials",
"severity": "high",
"confidence": "moderate",
"line_data_list": [
{
"line": "1b17231a97529365248357043f038afb58be4699fc955cee78d5c8a2b037e89d",
"line_num": 4256,
"path": "data/00408ef6/src/eee97fd2.c",
"info": "",
"value": "5489cf96c232e0925f96692a65e3e7afe8e09430c280e71b808da17dee9791f8",
"value_start": 16,
"value_end": 24,
"variable": "5b21d8adfc17cb2d231c2ec96498c8076c4a1d1e5373fa7d8b15a9e63d64d8f2",
"variable_start": 5,
"variable_end": 11,
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 3.0,
"valid": false
}
}
]
}
]' >report.json
cp report.json report.bak
python markup_report.py report.json
if diff report.bak report.json; then
echo "Something went wrong. The files must be different"
exit 1
else
# the substring from markup must appear in "api_validation"
grep 'eee97fd2,GitHub,00408ef6' report.json
fi
- name: Upload artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: reports
path: |
report.json
report.bak
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #