diff --git a/Cargo.toml b/Cargo.toml index da345bc..1f93727 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,7 @@ itertools = { version = "0.12.0", optional = true } dotenvy = { version = "0.15", optional = true } tempfile = { version = "3.8", optional = true } which = { version = "5.0", optional = true } +bgpkit-commons = { version = "0.5", optional = true } # crawler dependencies futures = { version = "0.3", optional = true } @@ -73,6 +74,7 @@ default = [] cli = [ # command-line interface "clap", "dirs", "humantime", "num_cpus", "tracing-subscriber", "tabled", "itertools", "dotenvy", "tempfile", "which", + "bgpkit-commons", # crawler "futures", "oneio", "regex", "scraper", "tokio", "lazy_static", # notification diff --git a/README.md b/README.md index 1d6700b..9889f5f 100644 --- a/README.md +++ b/README.md @@ -9,27 +9,44 @@ [crates-badge]: https://img.shields.io/crates/v/bgpkit-broker.svg + [crates-url]: https://crates.io/crates/bgpkit-broker + [mit-badge]: https://img.shields.io/badge/license-MIT-blue.svg + [mit-url]: https://github.com/bgpkit/bgpkit-broker/blob/main/LICENSE + [twitter-badge]: https://shields.io/badge/Follow-lightgrey?logo=twitter&style=social + [twitter-url]: https://twitter.com/bgpkit + [mastodon-url]: https://infosec.exchange/@bgpkit + [mastodon-badge]: https://img.shields.io/mastodon/follow/109852506691103147?domain=https%3A%2F%2Finfosec.exchange&style=social -[BGPKIT Broker](https://bgpkit.com/broker) is an online data API service that allows users to search for publicly available BGP archive files by time, collector, project, or data type. The service indexes the archives in close to real-time (delay is less than 5 minutes). Currently, we are indexing BGP table dump and updates files from [RIPE RIS][ripe-ris] and [RouteViews][route-views]. +[BGPKIT Broker](https://bgpkit.com/broker) is an online data API service that allows users to search for publicly +available BGP archive files by time, collector, project, or data type. The service indexes the archives in close to +real-time (delay is less than 5 minutes). Currently, we are indexing BGP table dump and updates files +from [RIPE RIS][ripe-ris] and [RouteViews][route-views]. [ripe-ris]: https://www.ripe.net/analyse/internet-measurements/routing-information-service-ris/ris-data-access/mrt-files-store + [route-views]: http://archive.routeviews.org/ -This Rust library provides SDK access to the BGPKIT Broker API as well as a command-line tool to start a self-hosted broker instance. +This Rust library provides SDK access to the BGPKIT Broker API as well as a command-line tool to start a self-hosted +broker instance. Current BGPKIT Broker API is available at . -BGPKIT Broker is used in production at [Cloudflare Radar][radar] powering its [routing page][routing] and projects like [BGP hijack detection]() and [route leak detection](https://blog.cloudflare.com/route-leak-detection-with-cloudflare-radar/). +BGPKIT Broker is used in production at [Cloudflare Radar][radar] powering its [routing page][routing] and projects +like [BGP hijack detection]() +and [route leak detection](https://blog.cloudflare.com/route-leak-detection-with-cloudflare-radar/). [radar]: https://radar.cloudflare.com/ + [route-leak]: https://blog.cloudflare.com/route-leak-detection-with-cloudflare-radar/ + [hijack]: https://blog.cloudflare.com/bgp-hijack-detection/ + [routing]: https://blog.cloudflare.com/radar-routing/ ## Broker Rust SDK @@ -37,8 +54,9 @@ BGPKIT Broker is used in production at [Cloudflare Radar][radar] powering its [r ### Usage Add the following dependency line to your project's `Cargo.toml` file: + ```yaml -bgpkit-broker = "0.7.0-beta.1" +bgpkit-broker = "0.7.0-beta.5" ``` ### Example @@ -68,22 +86,26 @@ pub fn main() { ## `bgpkit-broker` CLI Tool -`bgpkit-broker` is a command-line application that packages many functionalities to allow users to self-host a BGPKIT Broker instance with ease. +`bgpkit-broker` is a command-line application that packages many functionalities to allow users to self-host a BGPKIT +Broker instance with ease. ### Install -Install with `cargo install bgpkit-broker@0.7.0-beta.5 --features cli` or check out the main branch and run `cargo install --path . --features cli`. +Install with `cargo install bgpkit-broker@0.7.0-beta.5 --features cli` or check out the main branch and +run `cargo install --path . --features cli`. If you are on a macOS environment, you can also use homebrew to install the pre-compiled binary (universal): + ``` brew install bgpkit/tap/bgpkit-broker ``` + ### Usage `bgpkit-broker` has the following subcommands: ```text -A library and command-line to provide indexing and searching functionalities for public BGP data archive files over time. + A library and command-line to provide indexing and searching functionalities for public BGP data archive files over time. Usage: bgpkit-broker [OPTIONS] @@ -95,6 +117,8 @@ Commands: backup Backup Broker database search Search MRT files in Broker db latest Display latest MRT files indexed + live Streaming live from a broker NATS server + doctor Check broker instance health and missing collectors help Print this message or the help of the given subcommand(s) Options: @@ -106,78 +130,98 @@ Options: #### `serve` -`bgpkit-broker serve` is the main command to start the BGPKIT Broker service. It will start a web server that serves the API endpoints. It will also periodically update the local database unless the `--no-update` flag is set. +`bgpkit-broker serve` is the main command to start the BGPKIT Broker service. It will start a web server that serves the +API endpoints. It will also periodically update the local database unless the `--no-update` flag is set. ```text -Serve the Broker content via RESTful API + Serve the Broker content via RESTful API -Usage: bgpkit-broker serve [OPTIONS] +Usage: bgpkit-broker serve [OPTIONS] + +Arguments: + broker db file location Options: -i, --update-interval update interval in seconds [default: 300] --no-log disable logging - --bootstrap-parquet bootstrap from parquet file instead of DuckDB file + -b, --bootstrap bootstrap the database if it does not exist + --env + -s, --silent disable bootstrap progress bar -h, --host host address [default: 0.0.0.0] -p, --port port number [default: 40064] -r, --root root path, useful for configuring docs UI [default: /] --no-update disable updater service --no-api disable API service - --full-bootstrap do a full database bootstrap from duckdb or parquet file -h, --help Print help -V, --version Print version ``` #### `update` -`bgpkit-broker update` triggers a local database update manually. This command **cannot** be run at the same time as `serve` because the active API will lock the database file. +`bgpkit-broker update` triggers a local database update manually. This command **cannot** be run at the same time +as `serve` because the active API will lock the database file. ```text Update the Broker database -Usage: bgpkit-broker update [OPTIONS] +Usage: bgpkit-broker update [OPTIONS] + +Arguments: + broker db file location Options: - --no-log disable logging - --bootstrap-parquet bootstrap from parquet file instead of DuckDB file - -h, --help Print help - -V, --version Print version + -d, --days force number of days to look back. by default resume from the latest available data time + --no-log disable logging + --env + -h, --help Print help + -V, --version Print version ``` -#### `backup` -`bgpkit-broker update` runs a database backup and export the database to a duckdb file and a parquet file. This *can* be run while `serve` is running. +#### `backup` + +`bgpkit-broker update` runs a database backup and export the database to a duckdb file and a parquet file. This *can* be +run while `serve` is running. ```text -Export broker database to parquet file + Backup Broker database + +Usage: bgpkit-broker backup [OPTIONS] -Usage: bgpkit-broker backup [OPTIONS] +Arguments: + source database location + remote database location Options: - --no-log disable logging - --bootstrap-parquet bootstrap from parquet file instead of DuckDB file - -h, --help Print help - -V, --version Print version + -f, --force force writing backup file to existing file if specified + --no-log disable logging + --env + -s, --sqlite-cmd-path specify sqlite3 command path + -h, --help Print help + -V, --version Print version ``` -#### `search` +#### `search` + `bgpkit-broker search` queries for MRT files using the default production API unless specified otherwise. ```text -Search MRT files in Broker db + Search MRT files in Broker db Usage: bgpkit-broker search [OPTIONS] Options: --no-log disable logging -t, --ts-start Start timestamp - --bootstrap-parquet bootstrap from parquet file instead of DuckDB file + --env -T, --ts-end End timestamp + -d, --duration Duration string, e.g. 1 hour -p, --project filter by route collector projects, i.e. `route-views` or `riperis` -c, --collector-id filter by collector IDs, e.g. 'rrc00', 'route-views2. use comma to separate multiple collectors - -d, --data-type filter by data types, i.e. 'update', 'rib' + -d, --data-type filter by data types, i.e. 'updates', 'rib' --page page number --page-size page size - -u, --url - -j, --json print out search results in JSON format instead of Markdown table + -u, --url Specify broker endpoint + -j, --json Print out search results in JSON format instead of Markdown table -h, --help Print help -V, --version Print version ``` @@ -191,7 +235,7 @@ Options: - use `--json` flag to output to a JSON file instead of a Markdown table ```text -Display latest MRT files indexed + Display latest MRT files indexed Usage: bgpkit-broker latest [OPTIONS] @@ -206,6 +250,59 @@ Options: -V, --version Print version ``` +#### `live` + +Streaming live from a broker NATS server. + +```text +Streaming live from a broker NATS server + +Usage: bgpkit-broker live [OPTIONS] + +Options: + --no-log disable logging + -u, --url URL to NATS server, e.g. nats://localhost:4222. If not specified, will try to read from BGPKIT_BROKER_NATS_URL env variable + --env + -s, --subject Subject to subscribe to, default to public.broker.> + -p, --pretty Pretty print JSON output + -h, --help Print help + -V, --version Print version +``` + +#### `doctor` + +Check broker instance health and missing collectors. + +```text +Check broker instance health and missing collectors + +Usage: bgpkit-broker doctor [OPTIONS] + +Options: + --no-log disable logging + --env + -h, --help Print help + -V, --version Print version +``` + +Example output: + +```text +checking broker instance health... + broker instance at https://api.broker.bgpkit.com/v3 is healthy + +checking for missing collectors... +missing the following collectors: +| project | name | country | activated_on | data_url | +|------------|--------------|-----------------|---------------------|----------------------------------------------------| +| routeviews | decix.jhb | Malaysia | 2022-12-20 12:00:00 | http://archive.routeviews.org/decix.jhb/bgpdata | +| routeviews | pacwave.lax | United States | 2023-03-30 12:00:00 | http://archive.routeviews.org/pacwave.lax/bgpdata | +| routeviews | pit.scl | Chile | 2023-08-31 23:45:00 | http://archive.routeviews.org/pit.scl/bgpdata | +| routeviews | amsix.ams | The Netherlands | 2024-02-22 23:20:00 | http://archive.routeviews.org/amsix.ams/bgpdata | +| routeviews | pitmx.qro | Mexico | 2024-02-23 22:15:00 | http://archive.routeviews.org/pitmx.qro/bgpdata | +| routeviews | iraq-ixp.bgw | Iraq | 2024-04-13 00:01:00 | http://archive.routeviews.org/iraq-ixp.bgw/bgpdata | +``` + ## Data Provider If you have publicly available data and want to be indexed BGPKIT Broker service, please send us an email at diff --git a/src/cli/main.rs b/src/cli/main.rs index 80539c5..12f0384 100644 --- a/src/cli/main.rs +++ b/src/cli/main.rs @@ -9,13 +9,16 @@ use bgpkit_broker::notifier::NatsNotifier; use bgpkit_broker::{ crawl_collector, load_collectors, BgpkitBroker, Collector, LocalBrokerDb, DEFAULT_PAGE_SIZE, }; -use chrono::{Duration, Utc}; +use bgpkit_commons::collectors::MrtCollector; +use chrono::{Duration, NaiveDateTime, Utc}; use clap::{Parser, Subcommand}; use futures::StreamExt; +use itertools::Itertools; +use std::collections::{HashMap, HashSet}; use std::path::Path; use std::process::exit; use tabled::settings::Style; -use tabled::Table; +use tabled::{Table, Tabled}; use tokio::runtime::Runtime; use tracing::{debug, error, info}; @@ -196,6 +199,9 @@ enum Commands { #[clap(short, long)] pretty: bool, }, + + /// Check broker instance health and missing collectors + Doctor {}, } fn min_update_interval_check(s: &str) -> Result { @@ -620,5 +626,78 @@ fn main() { } }); } + + Commands::Doctor {} => { + if do_log { + enable_logging(); + } + println!("checking broker instance health..."); + let broker = BgpkitBroker::new(); + if broker.health_check().is_ok() { + println!("\tbroker instance at {} is healthy", broker.broker_url); + } else { + println!( + "\tbroker instance at {} is not available", + broker.broker_url + ); + return; + } + + println!(); + + #[derive(Tabled)] + struct CollectorInfo { + project: String, + name: String, + country: String, + activated_on: NaiveDateTime, + data_url: String, + } + + println!("checking for missing collectors..."); + let latest_items = broker.latest().unwrap(); + let latest_collectors: HashSet = + latest_items.into_iter().map(|i| i.collector_id).collect(); + let all_collectors_map: HashMap = + bgpkit_commons::collectors::get_all_collectors() + .unwrap() + .into_iter() + .map(|c| (c.name.clone(), c)) + .collect(); + + let all_collector_names: HashSet = all_collectors_map + .values() + .map(|c| c.name.clone()) + .collect(); + + // get the difference between the two sets + let missing_collectors: Vec = all_collector_names + .difference(&latest_collectors) + .map(|c| { + // convert to CollectorInfo + let collector = all_collectors_map.get(c).unwrap(); + let country_map = bgpkit_commons::countries::Countries::new().unwrap(); + CollectorInfo { + project: collector.project.to_string(), + name: collector.name.clone(), + country: country_map + .lookup_by_code(&collector.country) + .unwrap() + .name + .clone(), + activated_on: collector.activated_on, + data_url: collector.data_url.clone(), + } + }) + .sorted_by(|a, b| a.activated_on.cmp(&b.activated_on)) + .collect(); + + if missing_collectors.is_empty() { + println!("all collectors are up to date"); + } else { + println!("missing the following collectors:"); + println!("{}", Table::new(missing_collectors).with(Style::markdown())); + } + } } }