From 05e927655a7d5aa20b10d283e660557c0862b72c Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Fri, 9 Feb 2024 07:02:55 +0530 Subject: [PATCH] Add RoVista crawler(#124) Add RoVista crawler which categorizes ASes into validating and non-validating ROV. Closes #83. --------- Co-authored-by: Malte Tashiro --- config.json.example | 1 + iyp/crawlers/rovista/README.md | 27 ++++++++ iyp/crawlers/rovista/validating_rov.py | 91 ++++++++++++++++++++++++++ 3 files changed, 119 insertions(+) create mode 100644 iyp/crawlers/rovista/README.md create mode 100644 iyp/crawlers/rovista/validating_rov.py diff --git a/config.json.example b/config.json.example index a281435..2599b6a 100644 --- a/config.json.example +++ b/config.json.example @@ -57,6 +57,7 @@ "iyp.crawlers.ihr.rov", "iyp.crawlers.bgptools.tags", "iyp.crawlers.bgptools.anycast_prefixes", + "iyp.crawlers.rovista.validating_rov", "iyp.crawlers.stanford.asdb", "iyp.crawlers.peeringdb.org", "iyp.crawlers.peeringdb.fac", diff --git a/iyp/crawlers/rovista/README.md b/iyp/crawlers/rovista/README.md new file mode 100644 index 0000000..c342283 --- /dev/null +++ b/iyp/crawlers/rovista/README.md @@ -0,0 +1,27 @@ +# RoVista -- https://rovista.netsecurelab.org/ + +> RoVista aims to determine the Routing Origin Validation (ROV) status of network +> operators. +> +> RoV Scores are determined based on the number of RPKI-invalid prefixes reachable by an +> Autonomous System (AS). Consequently, a higher ROV score suggests that the AS can +> effectively filter more RPKI-invalid prefixes. However, it is important to note that +> the RoV score does not conclusively indicate whether an AS has actually implemented +> ROV or not, partly due to limitations in [the] framework and other contributing +> factors. + +IYP converts these scores (or ratios) to two Tags: + +- ASes with a ratio greater than 0.5 are categorized as `Validating RPKI ROV` +- ASes with a ratio of less or equal 0.5 are categorized as `Not Validating RPKI ROV` + +## Graph representation + +```cypher +(:AS {asn: 2497})-[:CATEGORIZED {ratio: 1.0}]->(:Tag {label: 'Validating RPKI ROV'}) +(:AS {asn: 6762})-[:CATEGORIZED {ratio: 0}]->(:Tag {label: 'Not Validating RPKI ROV'}) +``` + +## Dependence + +This crawler is not depending on other crawlers. diff --git a/iyp/crawlers/rovista/validating_rov.py b/iyp/crawlers/rovista/validating_rov.py new file mode 100644 index 0000000..f6acedb --- /dev/null +++ b/iyp/crawlers/rovista/validating_rov.py @@ -0,0 +1,91 @@ +import argparse +import logging +import os +import sys + +import requests + +from iyp import BaseCrawler, RequestStatusError + +URL = 'https://api.rovista.netsecurelab.org/rovista/api/overview' +ORG = 'RoVista' +NAME = 'rovista.validating_rov' + + +class Crawler(BaseCrawler): + + def run(self): + """Get RoVista data from their API.""" + batch_size = 1000 # Adjust batch size as needed + offset = 0 + entries = [] + asns = set() + + while True: + # Make a request with the current offset + response = requests.get(URL, params={'offset': offset, 'count': batch_size}) + if response.status_code != 200: + raise RequestStatusError('Error while fetching RoVista data') + + data = response.json().get('data', []) + for entry in data: + asns.add(entry['asn']) + if entry['ratio'] > 0.5: + entries.append({'asn': entry['asn'], 'ratio': entry['ratio'], 'label': 'Validating RPKI ROV'}) + else: + entries.append({'asn': entry['asn'], 'ratio': entry['ratio'], 'label': 'Not Validating RPKI ROV'}) + + # Move to the next page + offset += 1 + # Break the loop if there's no more data + if len(data) < batch_size: + break + logging.info('Pushing nodes to neo4j...') + # get ASNs and prefixes IDs + self.asn_id = self.iyp.batch_get_nodes_by_single_prop('AS', 'asn', asns) + tag_id_not_valid = self.iyp.get_node('Tag', {'label': 'Not Validating RPKI ROV'}, create=True) + tag_id_valid = self.iyp.get_node('Tag', {'label': 'Validating RPKI ROV'}, create=True) + # Compute links + links = [] + for entry in entries: + asn_qid = self.asn_id[entry['asn']] + if entry['ratio'] > 0.5: + links.append({'src_id': asn_qid, 'dst_id': tag_id_valid, + 'props': [self.reference, {'ratio': entry['ratio']}]}) + else: + links.append({'src_id': asn_qid, 'dst_id': tag_id_not_valid, + 'props': [self.reference, {'ratio': entry['ratio']}]}) + + logging.info('Pushing links to neo4j...') + # Push all links to IYP + self.iyp.batch_add_links('CATEGORIZED', links) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument('--unit-test', action='store_true') + args = parser.parse_args() + + scriptname = os.path.basename(sys.argv[0]).replace('/', '_')[0:-3] + FORMAT = '%(asctime)s %(levelname)s %(message)s' + logging.basicConfig( + format=FORMAT, + filename='log/' + scriptname + '.log', + level=logging.INFO, + datefmt='%Y-%m-%d %H:%M:%S' + ) + + logging.info(f'Started: {sys.argv}') + + crawler = Crawler(ORG, URL, NAME) + if args.unit_test: + crawler.unit_test(logging) + else: + crawler.run() + crawler.close() + logging.info(f'Finished: {sys.argv}') + + +if __name__ == '__main__': + main() + sys.exit(0)