From 6d246181f1018e7e2fd21c75d0f2b45842c2716e Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Tue, 30 Jan 2024 11:43:57 +0530 Subject: [PATCH 1/3] decompressed roas.csv.xz and changed the urls --- iyp/crawlers/ripe/roa.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/iyp/crawlers/ripe/roa.py b/iyp/crawlers/ripe/roa.py index 55459e49..b1afadaa 100644 --- a/iyp/crawlers/ripe/roa.py +++ b/iyp/crawlers/ripe/roa.py @@ -4,7 +4,8 @@ import sys from collections import defaultdict from datetime import datetime, timedelta - +import lzma +from io import BytesIO import requests from iyp import BaseCrawler, RequestStatusError @@ -25,7 +26,7 @@ def __init__(self, organization, url, name): self.date_path = f'{now.year}/{now.month:02d}/{now.day:02d}' # Check if today's data is available - self.url = f'{URL}/afrinic.tal/{self.date_path}/roas.csv' + self.url = f'{URL}/afrinic.tal/{self.date_path}/roas.csv.xz' req = requests.head(self.url) if req.status_code != 200: now -= timedelta(days=1) @@ -37,20 +38,21 @@ def __init__(self, organization, url, name): def run(self): """Fetch data from RIPE and push to IYP.""" - for tal in TALS: - - self.url = f'{URL}/{tal}/{self.date_path}/roas.csv' + self.url = f'{URL}/{tal}/{self.date_path}/roas.csv.xz' logging.info(f'Fetching ROA file: {self.url}') req = requests.get(self.url) if req.status_code != 200: raise RequestStatusError('Error while fetching data for ' + self.url) + # Decompress the .xz file and read it as CSV + with lzma.open(BytesIO(req.content)) as xz_file: + csv_content = xz_file.read().decode('utf-8').splitlines() + # Aggregate data per prefix asns = set() - prefix_info = defaultdict(list) - for line in req.text.splitlines(): + for line in csv_content: url, asn, prefix, max_length, start, end = line.split(',') # Skip header From ed3fef24e8313f0590752bdafd6f8d132fe33ac8 Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Tue, 30 Jan 2024 16:42:44 +0530 Subject: [PATCH 2/3] pre-commit pre-commit --- iyp/crawlers/ripe/roa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iyp/crawlers/ripe/roa.py b/iyp/crawlers/ripe/roa.py index b1afadaa..d1454143 100644 --- a/iyp/crawlers/ripe/roa.py +++ b/iyp/crawlers/ripe/roa.py @@ -4,9 +4,9 @@ import sys from collections import defaultdict from datetime import datetime, timedelta +import requests import lzma from io import BytesIO -import requests from iyp import BaseCrawler, RequestStatusError From 95cd7d2990f187c8eeb23193994dea6a9adafbfb Mon Sep 17 00:00:00 2001 From: Rishi Mondal <146999057+MAVRICK-1@users.noreply.github.com> Date: Tue, 30 Jan 2024 18:02:11 +0530 Subject: [PATCH 3/3] updated the code structre passed all test --- iyp/crawlers/ripe/roa.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/iyp/crawlers/ripe/roa.py b/iyp/crawlers/ripe/roa.py index d1454143..b4a62af4 100644 --- a/iyp/crawlers/ripe/roa.py +++ b/iyp/crawlers/ripe/roa.py @@ -1,13 +1,14 @@ import argparse import logging +import lzma import os import sys from collections import defaultdict from datetime import datetime, timedelta -import requests -import lzma from io import BytesIO +import requests + from iyp import BaseCrawler, RequestStatusError # URL to RIPE repository