diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..20bf7ad --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,41 @@ +# Bug Report + +## Description + + +## Steps to Reproduce + +1. +2. +3. + +## Expected Behavior + + +## Actual Behavior + + +## Environment +- **Python Version:** +- **Operating System:** +- **CLI/Library Version:** + +## Error Logs + + +## Screenshots + + +## Possible Fix + + +## Related Issue + + +## Checklist +- [ ] I have searched for similar issues before submitting this bug report +- [ ] The bug is reproducible with the latest version +- [ ] I have included all relevant information for a clear understanding + +## Additional Notes + \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..e4d4c04 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,27 @@ +# Feature Request + +## Description + + +## Use Case + + +## Proposed Solution + + +## Expected Behavior + + +## Additional Context + + +## Checklist +- [ ] I have searched for existing feature requests that are similar to this one +- [ ] This feature is not already present in the latest version +- [ ] I believe this feature would be beneficial to a wider audience + +## Related Issue + + +## Additional Notes + diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..c9b1b64 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,25 @@ +# Pull Request + +## Summary + + +## Changes + + +## Related Issue + + +## Test Plan + + +## Documentation Updates + + +## Checklist +- [ ] Code follows project coding standards +- [ ] Unit tests added and pass successfully +- [ ] Documentation updated to reflect changes +- [ ] Changes do not introduce new issues + +## Additional Notes + \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 6d2e9f3..8958dcc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ appdirs py-radix portion +pandas diff --git a/rov/__init__.py b/rov/__init__.py index d10c58d..6dfe85d 100644 --- a/rov/__init__.py +++ b/rov/__init__.py @@ -13,6 +13,8 @@ import sys import csv import lzma +from datetime import datetime +import pandas as pd from io import BytesIO import urllib @@ -65,15 +67,20 @@ DEFAULT_RPKI_URLS = [ 'https://rpki.gin.ntt.net/api/export.json' ] -RPKI_ARCHIVE_URLS = [ +RPKI_ARCHIVE_URLS = [ 'https://ftp.ripe.net/rpki/afrinic.tal/{year:04d}/{month:02d}/{day:02d}/roas.csv.xz', 'https://ftp.ripe.net/rpki/apnic.tal/{year:04d}/{month:02d}/{day:02d}/roas.csv.xz', 'https://ftp.ripe.net/rpki/arin.tal/{year:04d}/{month:02d}/{day:02d}/roas.csv.xz', 'https://ftp.ripe.net/rpki/lacnic.tal/{year:04d}/{month:02d}/{day:02d}/roas.csv.xz', 'https://ftp.ripe.net/rpki/ripencc.tal/{year:04d}/{month:02d}/{day:02d}/roas.csv.xz', - + 'https://ftp.ripe.net/rpki/afrinic.tal/{year:04d}/{month:02d}/{day:02d}/output.json.xz', + 'https://ftp.ripe.net/rpki/apnic.tal/{year:04d}/{month:02d}/{day:02d}/output.json.xz', + 'https://ftp.ripe.net/rpki/arin.tal/{year:04d}/{month:02d}/{day:02d}/output.json.xz', + 'https://ftp.ripe.net/rpki/lacnic.tal/{year:04d}/{month:02d}/{day:02d}/output.json.xz', + 'https://ftp.ripe.net/rpki/ripencc.tal/{year:04d}/{month:02d}/{day:02d}/output.json.xz', ] -DEFAULT_DELEGATED_URLS = [ + +DEFAULT_DELEGATED_URLS = [ 'https://www.nro.net/wp-content/uploads/delegated-stats/nro-extended-stats' ] @@ -86,11 +93,28 @@ def guess_ta_name(url): return 'unknown' +def download_csv_xz_and_convert_to_json(url, json_file_path,fname): + + # Download and decompress the xz file + with request.urlopen(url) as response: + with lzma.LZMAFile(BytesIO(response.read())) as decompressed_data: + # Read CSV data into a pandas DataFrame + df = pd.read_csv(decompressed_data, header=None, names=["URI", "ASN", "IP Prefix", "Max Length", "Not Before", "Not After"],dtype='unicode') + + # Convert DataFrame to a list of dictionaries with the specified keys + data_list = [{"asn": row['ASN'], "prefix": row["IP Prefix"], "maxLength": None if pd.isnull(row["Max Length"]) else row["Max Length"] , "ta": f"{fname.replace(".json", "")}"} for _, row in df.iterrows()] + + # Create a dictionary with the specified keys + result_dict = {"roas": data_list[1:]} + + # Save JSON to a file + with open(json_file_path, "w") as json_file: + json.dump(result_dict, json_file, indent=2) class ROV(object): - def __init__( self, irr_urls=DEFAULT_IRR_URLS, rpki_urls=DEFAULT_RPKI_URLS, - delegated_urls=DEFAULT_DELEGATED_URLS, irr_dir=DEFAULT_IRR_DIR, + def __init__( self, irr_urls=DEFAULT_IRR_URLS, rpki_urls=DEFAULT_RPKI_URLS, + delegated_urls=DEFAULT_DELEGATED_URLS, irr_dir=DEFAULT_IRR_DIR, rpki_dir=DEFAULT_RPKI_DIR, delegated_dir=DEFAULT_DELEGATED_DIR ): """Initialize ROV object with databases URLs""" @@ -258,14 +282,13 @@ def load_rpki(self): 'endTime': row[5], 'ta': ta } ) - else: sys.stderr.write('Error: Unknown file format for RPKI data!') - return + return for rec in data['roas']: - if( isinstance(rec['asn'], str) + if( isinstance(rec['asn'], str) and rec['asn'].startswith('AS') ): asn = int(rec['asn'][2:]) else: @@ -464,9 +487,30 @@ def download_databases(self, overwrite=True): # 'roas.csv', change it with the tal name if fname == 'roas.csv.xz': fname = guess_ta_name(url)+".csv" + + if fname =='output.json.xz': + fname = guess_ta_name(url)+".json" if os.path.exists(folder+fname) and not overwrite: continue + flag =0 + if "/20" in url: + date_part_start = url.find('/20') # Find the starting index of the date part + date_part = url[date_part_start + 1:date_part_start + 11] # Extract the date substring (e.g., '2023/01/06') + + try: + url_date = datetime.strptime(date_part, '%Y/%m/%d') # Convert extracted date to a datetime object + target_date = datetime(2023, 10, 1) # October 1, 2023 + + if url_date < target_date: + if "json.xz" in url: + continue + flag=1 + else: + pass + + except ValueError: + return 'Date not found in the URL.' sys.stderr.write(f'Downloading: {url}\n') @@ -477,10 +521,26 @@ def download_databases(self, overwrite=True): with lzma.open(BytesIO(response.read())) as r: with open(folder+fname, 'wb') as f: shutil.copyfileobj(r,f) + if flag==1: + + + # Write JSON file + fname=fname.replace('csv', 'json') + download_csv_xz_and_convert_to_json(url,folder+fname,fname) + + + + # decompress json.xz to json + elif "output.json.xz": + with closing(request.urlopen(url)) as response: + with lzma.open(BytesIO(response.read())) as r: + json_data = json.loads(r.read().decode('utf-8')) + with open(folder+fname, 'w') as f: + json.dump(json_data, f, indent=4) + shutil.copyfileobj(r,f) else: with closing(request.urlopen(url)) as r: with open(folder+fname, 'wb') as f: shutil.copyfileobj(r, f) except urllib.error.URLError: sys.stderr.write(f'Error {url} is not available.\n') - diff --git a/setup.py b/setup.py index e1d15da..dadedae 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,8 @@ install_requires=[ 'appdirs', 'py-radix', - 'portion' + 'portion', + 'pandas' ], entry_points={'console_scripts': ['rov = rov.__main__:main']},