Skip to content

Commit

Permalink
Update Atlas probe crawler to fetch all probes
Browse files Browse the repository at this point in the history
For the planned measurement crawler it is required to have more than
only the connected probes in the graph. Especially long-running
measurements can contain disconnected probes, but we might still want to
model them.

Note that this crawler can now create dangling nodes, e.g., there is a
status "Never Connected" where the probes have no IP/ASN/country. But
instead of arbitrarily deciding what to include and what not, we just
fetch all (public) probes instead, since the number is not very large.

This commit also updates the IPv6 handling to guarantee a canonical
form.
  • Loading branch information
m-appel committed Dec 20, 2023
1 parent ed0180b commit 0773bd5
Showing 1 changed file with 15 additions and 9 deletions.
24 changes: 15 additions & 9 deletions iyp/crawlers/ripe/atlas_probes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import ipaddress
import json
import logging
import os
Expand Down Expand Up @@ -64,7 +65,6 @@ def __add_if_not_none(v, s: set):

def run(self):
params = {'format': 'json',
'status': 1, # Connected
'is_public': True,
'page_size': 500}
r = self.session.get(URL, params=params)
Expand All @@ -73,7 +73,7 @@ def run(self):
next_url, next_data = self.__execute_query(next_url)
data += next_data
logging.info(f'Added {len(next_data)} probes. Total: {len(data)}')
print(f'Fetched {len(data)} connected probes.', file=sys.stderr)
print(f'Fetched {len(data)} probes.', file=sys.stderr)

# Compute nodes
probe_ids = set()
Expand All @@ -92,14 +92,15 @@ def run(self):
logging.warning(f'Duplicate probe ID: {probe_id}. Probably caused by changing probe connectivity while '
'fetching.')
continue

ipv4 = probe['address_v4']
asv4 = probe['asn_v4']
# Ensure proper IP formatting.
ipv6 = probe['address_v6']
if ipv6:
ipv6 = ipaddress.ip_address(ipv6).compressed
probe['address_v6'] = ipv6
asv4 = probe['asn_v4']
asv6 = probe['asn_v6']
# A probe should have at least IP/AS information for IPv4 or IPv6.
if not all((ipv4, asv4)) and not all((ipv6, asv6)):
logging.warning(f'No IPv4/v6 information for probe {probe}')
continue

probe_ids.add(probe_id)
valid_probes.append(probe)
Expand All @@ -115,6 +116,10 @@ def run(self):
else:
logging.warning(f'Skipping creation of COUNTRY relationship of probe {probe["id"]} due to non-ISO '
f'country code: {country_code}')
else:
# Our country_code property formatter does not like None objects, so
# remove the property instead.
probe.pop('country_code')

# push nodes
logging.info('Fetching/pushing nodes')
Expand Down Expand Up @@ -154,8 +159,9 @@ def run(self):
as_qid = as_id[asv6]
located_in_links.append({'src_id': probe_qid, 'dst_id': as_qid, 'props': [self.reference, {'af': 6}]})

country_code = probe['country_code']
if country_code and country_code in iso3166.countries_by_alpha2:
if ('country_code' in probe
and (country_code := probe['country_code'])
and country_code in iso3166.countries_by_alpha2):
country_qid = country_id[country_code]
country_links.append({'src_id': probe_qid, 'dst_id': country_qid,
'props': [self.reference]})
Expand Down

0 comments on commit 0773bd5

Please sign in to comment.