From 0773bd539acd2dbf17754264a98611f3b6aa5870 Mon Sep 17 00:00:00 2001 From: Malte Tashiro Date: Wed, 20 Dec 2023 05:57:21 +0000 Subject: [PATCH] Update Atlas probe crawler to fetch all probes For the planned measurement crawler it is required to have more than only the connected probes in the graph. Especially long-running measurements can contain disconnected probes, but we might still want to model them. Note that this crawler can now create dangling nodes, e.g., there is a status "Never Connected" where the probes have no IP/ASN/country. But instead of arbitrarily deciding what to include and what not, we just fetch all (public) probes instead, since the number is not very large. This commit also updates the IPv6 handling to guarantee a canonical form. --- iyp/crawlers/ripe/atlas_probes.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/iyp/crawlers/ripe/atlas_probes.py b/iyp/crawlers/ripe/atlas_probes.py index 907a062..8d8596a 100644 --- a/iyp/crawlers/ripe/atlas_probes.py +++ b/iyp/crawlers/ripe/atlas_probes.py @@ -1,4 +1,5 @@ import argparse +import ipaddress import json import logging import os @@ -64,7 +65,6 @@ def __add_if_not_none(v, s: set): def run(self): params = {'format': 'json', - 'status': 1, # Connected 'is_public': True, 'page_size': 500} r = self.session.get(URL, params=params) @@ -73,7 +73,7 @@ def run(self): next_url, next_data = self.__execute_query(next_url) data += next_data logging.info(f'Added {len(next_data)} probes. Total: {len(data)}') - print(f'Fetched {len(data)} connected probes.', file=sys.stderr) + print(f'Fetched {len(data)} probes.', file=sys.stderr) # Compute nodes probe_ids = set() @@ -92,14 +92,15 @@ def run(self): logging.warning(f'Duplicate probe ID: {probe_id}. Probably caused by changing probe connectivity while ' 'fetching.') continue + ipv4 = probe['address_v4'] - asv4 = probe['asn_v4'] + # Ensure proper IP formatting. ipv6 = probe['address_v6'] + if ipv6: + ipv6 = ipaddress.ip_address(ipv6).compressed + probe['address_v6'] = ipv6 + asv4 = probe['asn_v4'] asv6 = probe['asn_v6'] - # A probe should have at least IP/AS information for IPv4 or IPv6. - if not all((ipv4, asv4)) and not all((ipv6, asv6)): - logging.warning(f'No IPv4/v6 information for probe {probe}') - continue probe_ids.add(probe_id) valid_probes.append(probe) @@ -115,6 +116,10 @@ def run(self): else: logging.warning(f'Skipping creation of COUNTRY relationship of probe {probe["id"]} due to non-ISO ' f'country code: {country_code}') + else: + # Our country_code property formatter does not like None objects, so + # remove the property instead. + probe.pop('country_code') # push nodes logging.info('Fetching/pushing nodes') @@ -154,8 +159,9 @@ def run(self): as_qid = as_id[asv6] located_in_links.append({'src_id': probe_qid, 'dst_id': as_qid, 'props': [self.reference, {'af': 6}]}) - country_code = probe['country_code'] - if country_code and country_code in iso3166.countries_by_alpha2: + if ('country_code' in probe + and (country_code := probe['country_code']) + and country_code in iso3166.countries_by_alpha2): country_qid = country_id[country_code] country_links.append({'src_id': probe_qid, 'dst_id': country_qid, 'props': [self.reference]})