-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add RoVista crawler which categorizes ASes into validating and non-validating ROV. Closes #83. --------- Co-authored-by: Malte Tashiro <[email protected]>
- Loading branch information
Showing
3 changed files
with
119 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# RoVista -- https://rovista.netsecurelab.org/ | ||
|
||
> RoVista aims to determine the Routing Origin Validation (ROV) status of network | ||
> operators. | ||
> | ||
> RoV Scores are determined based on the number of RPKI-invalid prefixes reachable by an | ||
> Autonomous System (AS). Consequently, a higher ROV score suggests that the AS can | ||
> effectively filter more RPKI-invalid prefixes. However, it is important to note that | ||
> the RoV score does not conclusively indicate whether an AS has actually implemented | ||
> ROV or not, partly due to limitations in [the] framework and other contributing | ||
> factors. | ||
IYP converts these scores (or ratios) to two Tags: | ||
|
||
- ASes with a ratio greater than 0.5 are categorized as `Validating RPKI ROV` | ||
- ASes with a ratio of less or equal 0.5 are categorized as `Not Validating RPKI ROV` | ||
|
||
## Graph representation | ||
|
||
```cypher | ||
(:AS {asn: 2497})-[:CATEGORIZED {ratio: 1.0}]->(:Tag {label: 'Validating RPKI ROV'}) | ||
(:AS {asn: 6762})-[:CATEGORIZED {ratio: 0}]->(:Tag {label: 'Not Validating RPKI ROV'}) | ||
``` | ||
|
||
## Dependence | ||
|
||
This crawler is not depending on other crawlers. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
import argparse | ||
import logging | ||
import os | ||
import sys | ||
|
||
import requests | ||
|
||
from iyp import BaseCrawler, RequestStatusError | ||
|
||
URL = 'https://api.rovista.netsecurelab.org/rovista/api/overview' | ||
ORG = 'RoVista' | ||
NAME = 'rovista.validating_rov' | ||
|
||
|
||
class Crawler(BaseCrawler): | ||
|
||
def run(self): | ||
"""Get RoVista data from their API.""" | ||
batch_size = 1000 # Adjust batch size as needed | ||
offset = 0 | ||
entries = [] | ||
asns = set() | ||
|
||
while True: | ||
# Make a request with the current offset | ||
response = requests.get(URL, params={'offset': offset, 'count': batch_size}) | ||
if response.status_code != 200: | ||
raise RequestStatusError('Error while fetching RoVista data') | ||
|
||
data = response.json().get('data', []) | ||
for entry in data: | ||
asns.add(entry['asn']) | ||
if entry['ratio'] > 0.5: | ||
entries.append({'asn': entry['asn'], 'ratio': entry['ratio'], 'label': 'Validating RPKI ROV'}) | ||
else: | ||
entries.append({'asn': entry['asn'], 'ratio': entry['ratio'], 'label': 'Not Validating RPKI ROV'}) | ||
|
||
# Move to the next page | ||
offset += 1 | ||
# Break the loop if there's no more data | ||
if len(data) < batch_size: | ||
break | ||
logging.info('Pushing nodes to neo4j...') | ||
# get ASNs and prefixes IDs | ||
self.asn_id = self.iyp.batch_get_nodes_by_single_prop('AS', 'asn', asns) | ||
tag_id_not_valid = self.iyp.get_node('Tag', {'label': 'Not Validating RPKI ROV'}, create=True) | ||
tag_id_valid = self.iyp.get_node('Tag', {'label': 'Validating RPKI ROV'}, create=True) | ||
# Compute links | ||
links = [] | ||
for entry in entries: | ||
asn_qid = self.asn_id[entry['asn']] | ||
if entry['ratio'] > 0.5: | ||
links.append({'src_id': asn_qid, 'dst_id': tag_id_valid, | ||
'props': [self.reference, {'ratio': entry['ratio']}]}) | ||
else: | ||
links.append({'src_id': asn_qid, 'dst_id': tag_id_not_valid, | ||
'props': [self.reference, {'ratio': entry['ratio']}]}) | ||
|
||
logging.info('Pushing links to neo4j...') | ||
# Push all links to IYP | ||
self.iyp.batch_add_links('CATEGORIZED', links) | ||
|
||
|
||
def main() -> None: | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--unit-test', action='store_true') | ||
args = parser.parse_args() | ||
|
||
scriptname = os.path.basename(sys.argv[0]).replace('/', '_')[0:-3] | ||
FORMAT = '%(asctime)s %(levelname)s %(message)s' | ||
logging.basicConfig( | ||
format=FORMAT, | ||
filename='log/' + scriptname + '.log', | ||
level=logging.INFO, | ||
datefmt='%Y-%m-%d %H:%M:%S' | ||
) | ||
|
||
logging.info(f'Started: {sys.argv}') | ||
|
||
crawler = Crawler(ORG, URL, NAME) | ||
if args.unit_test: | ||
crawler.unit_test(logging) | ||
else: | ||
crawler.run() | ||
crawler.close() | ||
logging.info(f'Finished: {sys.argv}') | ||
|
||
|
||
if __name__ == '__main__': | ||
main() | ||
sys.exit(0) |