Skip to content

Build Test Data

Build Test Data #499

Workflow file for this run

name: Build Test Data
on:
push:
branches: [main]
pull_request:
branches: [main]
schedule:
- cron: 24 0 * * * # Daily at midnight UTC
env:
TEST_IPV6: false
jobs:
build-data:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: |
echo "NOW=$(date '+%D')" >> ${GITHUB_ENV}
echo "RELEASE_DATE=$(date '+%D %T')" >> ${GITHUB_ENV}
mkdir data
mkdir test-data
- name: Start the Results File
run: |
echo -e "# Built on: ${{ env.RELEASE_DATE }}" >> results.md
# Cache key is valid for 1 day & is to make repeat tests more efficient
- name: Cache Testing Data
uses: actions/cache@v4
id: cache-test-data
with:
path: ./data
key: testing-data-${{ env.NOW }}-${{ github.ref_name }}
- name: Download Test Data
if: steps.cache-test-data.outputs.cache-hit != 'true'
working-directory: ./data
run: |
curl --show-error --output feed.xml --location "https://www.pingdom.com/rss/probe_servers.xml"
curl --show-error --output hetrix.txt --location "https://hetrixtools.com/resources/uptime-monitor-ips.txt"
curl --show-error --output updown.json --location "https://updown.io/api/nodes"
curl --show-error --output statuscake.json --location "https://app.statuscake.com/Workfloor/Locations.php?format=json"
curl --show-error --output oracle-ranges.json --location "https://docs.oracle.com/en-us/iaas/tools/public_ip_ranges.json"
curl --show-error --output linode-geofeed.csv --location "https://geoip.linode.com/"
curl --show-error --output digitalocean-geofeed.csv --location "https://digitalocean.com/geo/google.csv"
curl --show-error --output vultr-geofeed.csv --location "https://geofeed.constant.com/"
curl --show-error --output starlink-geofeed.csv --location "https://geoip.starlinkisp.net/feed.csv"
curl --show-error --output google-geofeed.csv --location "https://www.gstatic.com/ipranges/cloud_geofeed"
curl --show-error --output aws-geofeed.csv --location "http://ip-ranges.amazonaws.com/geo-ip-feed.csv"
curl --show-error --output ting-geofeed.csv --location "https://geoip.tingfiber.net/tf-geofeed.csv"
#curl --show-error --output geolocatemuch-geofeed.csv --location "https://geolocatemuch.com/geofeeds/validated-all.csv"
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "pypy3.10"
- name: Install Python Requirements
working-directory: ./scripts
run: |
pypy -m ensurepip
pip install -r requirements.txt
- name: Parse Data
run: |
python ./scripts/parse-pingdom.py ./data/feed.xml ./test-data/IPv4.json ip
python ./scripts/parse-pingdom.py ./data/feed.xml ./test-data/IPv6.json ipv6
python ./scripts/parse-hetrix.py ./data/hetrix.txt ./test-data/IPv4.json
python ./scripts/parse-updown.py ./data/updown.json ./test-data/IPv4.json ip
python ./scripts/parse-updown.py ./data/updown.json ./test-data/IPv6.json ipv6
python ./scripts/parse-statuscake.py ./data/statuscake.json ./test-data/IPv4.json ip
python ./scripts/parse-statuscake.py ./data/statuscake.json ./test-data/IPv6.json ipv6
python ./scripts/parse-oracle.py ./data/oracle-ranges.json ./test-data/IPv4.json
- name: Parse Data (Geofeeds)
run: |
find ./data -type f -name "*-geofeed.csv" -print | while read -r file; do
python ./scripts/parse-geofeed.py $file ./test-data/IPv4.json ip
python ./scripts/parse-geofeed.py $file ./test-data/IPv6.json ipv6
done
- name: Deduplicate and Process
run: |
echo -e "\n## IPv4 Processing Result\n" >> results.md
python ./scripts/process.py ./test-data/IPv4.json >> results.md
echo -e "\n## IPv6 Processing Result\n" >> results.md
python ./scripts/process.py ./test-data/IPv6.json >> results.md
- name: Upload Test Data
uses: actions/upload-artifact@v4
with:
name: test-data
path: |
./test-data/IPv6.json
./test-data/IPv4.json
./results.md