Skip to content

Commit

Permalink
Add RoVista crawler(#124)
Browse files Browse the repository at this point in the history
Add RoVista crawler which categorizes ASes into validating and non-validating ROV.

Closes #83.

---------

Co-authored-by: Malte Tashiro <[email protected]>
  • Loading branch information
MAVRICK-1 and m-appel authored Feb 9, 2024
1 parent e318100 commit 05e9276
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 0 deletions.
1 change: 1 addition & 0 deletions config.json.example
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
"iyp.crawlers.ihr.rov",
"iyp.crawlers.bgptools.tags",
"iyp.crawlers.bgptools.anycast_prefixes",
"iyp.crawlers.rovista.validating_rov",
"iyp.crawlers.stanford.asdb",
"iyp.crawlers.peeringdb.org",
"iyp.crawlers.peeringdb.fac",
Expand Down
27 changes: 27 additions & 0 deletions iyp/crawlers/rovista/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# RoVista -- https://rovista.netsecurelab.org/

> RoVista aims to determine the Routing Origin Validation (ROV) status of network
> operators.
>
> RoV Scores are determined based on the number of RPKI-invalid prefixes reachable by an
> Autonomous System (AS). Consequently, a higher ROV score suggests that the AS can
> effectively filter more RPKI-invalid prefixes. However, it is important to note that
> the RoV score does not conclusively indicate whether an AS has actually implemented
> ROV or not, partly due to limitations in [the] framework and other contributing
> factors.
IYP converts these scores (or ratios) to two Tags:

- ASes with a ratio greater than 0.5 are categorized as `Validating RPKI ROV`
- ASes with a ratio of less or equal 0.5 are categorized as `Not Validating RPKI ROV`

## Graph representation

```cypher
(:AS {asn: 2497})-[:CATEGORIZED {ratio: 1.0}]->(:Tag {label: 'Validating RPKI ROV'})
(:AS {asn: 6762})-[:CATEGORIZED {ratio: 0}]->(:Tag {label: 'Not Validating RPKI ROV'})
```

## Dependence

This crawler is not depending on other crawlers.
91 changes: 91 additions & 0 deletions iyp/crawlers/rovista/validating_rov.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import argparse
import logging
import os
import sys

import requests

from iyp import BaseCrawler, RequestStatusError

URL = 'https://api.rovista.netsecurelab.org/rovista/api/overview'
ORG = 'RoVista'
NAME = 'rovista.validating_rov'


class Crawler(BaseCrawler):

def run(self):
"""Get RoVista data from their API."""
batch_size = 1000 # Adjust batch size as needed
offset = 0
entries = []
asns = set()

while True:
# Make a request with the current offset
response = requests.get(URL, params={'offset': offset, 'count': batch_size})
if response.status_code != 200:
raise RequestStatusError('Error while fetching RoVista data')

data = response.json().get('data', [])
for entry in data:
asns.add(entry['asn'])
if entry['ratio'] > 0.5:
entries.append({'asn': entry['asn'], 'ratio': entry['ratio'], 'label': 'Validating RPKI ROV'})
else:
entries.append({'asn': entry['asn'], 'ratio': entry['ratio'], 'label': 'Not Validating RPKI ROV'})

# Move to the next page
offset += 1
# Break the loop if there's no more data
if len(data) < batch_size:
break
logging.info('Pushing nodes to neo4j...')
# get ASNs and prefixes IDs
self.asn_id = self.iyp.batch_get_nodes_by_single_prop('AS', 'asn', asns)
tag_id_not_valid = self.iyp.get_node('Tag', {'label': 'Not Validating RPKI ROV'}, create=True)
tag_id_valid = self.iyp.get_node('Tag', {'label': 'Validating RPKI ROV'}, create=True)
# Compute links
links = []
for entry in entries:
asn_qid = self.asn_id[entry['asn']]
if entry['ratio'] > 0.5:
links.append({'src_id': asn_qid, 'dst_id': tag_id_valid,
'props': [self.reference, {'ratio': entry['ratio']}]})
else:
links.append({'src_id': asn_qid, 'dst_id': tag_id_not_valid,
'props': [self.reference, {'ratio': entry['ratio']}]})

logging.info('Pushing links to neo4j...')
# Push all links to IYP
self.iyp.batch_add_links('CATEGORIZED', links)


def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument('--unit-test', action='store_true')
args = parser.parse_args()

scriptname = os.path.basename(sys.argv[0]).replace('/', '_')[0:-3]
FORMAT = '%(asctime)s %(levelname)s %(message)s'
logging.basicConfig(
format=FORMAT,
filename='log/' + scriptname + '.log',
level=logging.INFO,
datefmt='%Y-%m-%d %H:%M:%S'
)

logging.info(f'Started: {sys.argv}')

crawler = Crawler(ORG, URL, NAME)
if args.unit_test:
crawler.unit_test(logging)
else:
crawler.run()
crawler.close()
logging.info(f'Finished: {sys.argv}')


if __name__ == '__main__':
main()
sys.exit(0)

0 comments on commit 05e9276

Please sign in to comment.