From 51d37b6b1a6f375fe1289ef92adf8c2100fc3274 Mon Sep 17 00:00:00 2001 From: Andreas Dzialocha Date: Fri, 25 Aug 2023 12:47:42 +0200 Subject: [PATCH] Improved configuration API with TOML file (#519) * Add announce message type * Add announcement state to peer table * Refactor schema provider as the list of schemas is already filtered * Remove broken test, code will change anyhow soon * Fix test after changing message types * Filter schema ids directly in provider * Make distinction clearer between whitelisting and supporting schema ids * Make clippy happy * Generate new announcement state on first run and when provider updates * Move Announcement struct into separate file * Our own target set should always be valid * Introduce a peer message type for messages outside of replication * Update doc string * Struct and serde for announce messages * It's actually okay to send an empty target set * Check if sync request message has a non empty target set * Add a test for checking against empty target sets * Send announcement state to all peers which are not informed yet * Update peer status on incoming announcement * Rename variable * Calculate intersection between target sets of two peers * Add entry to CHANGELOG.md * Announcements contain the replication protocol version * Silently ignore peers with unsupported protocol version * Test serde for announcement messages * Update routine which gets executed on every established new connection and scheduler beat * Update timestamp when sending announcement to peer * Correctly check if remote target set is valid in sync request * Temporary fix to account for different message types * Fix validating target sets * Update test * Also test receiving external announcement * Improve panic message, fix another test * Better CHANGELOG.md entry * Give announce messages a type as well so serde can distinct the variants * Update tests * Remove data dir, rename fields, introduce schema id enum in config * Introduce new config.toml file * Better config field names, rename struct to WildcardOption * New command line arguments, find config file automatically * Fix tests * Pandada * Fix naming of allow lists and database url * Rename to im_a_relay * Make relay singular * Add some todos * Rename to direct_node_addresses * Update config.toml after renaming field * Allow wildcard strings in config.toml, fix issue with clap overriding values * Add a doc string * Remove unnecessary serde code for AllowList * We already generate a path for keys * Always show absolute path of config file, whatever comes * Show basic config and all addresses on startup * Correct print config when empty array * Allow --mdns and --relay-flag args to be used without bool value * Allow use of wildcard strings in --supported-schema-ids argument * Rename to allow-schema-ids config * Make sure to not print doc string in about section * Improve texts * Empty string should lead to empty array * Show warnings to user for some configs * Minor nice change * Remove usage section for now in README.md * Add entry to CHANGELOG.md * Fix merge --- .gitignore | 3 + CHANGELOG.md | 1 + Cargo.lock | 189 ++++++++-- aquadoggo/Cargo.toml | 5 +- aquadoggo/src/config.rs | 124 ++----- aquadoggo/src/http/service.rs | 10 +- aquadoggo/src/lib.rs | 2 +- aquadoggo/src/network/behaviour.rs | 25 +- aquadoggo/src/network/config.rs | 87 +++-- aquadoggo/src/network/mod.rs | 1 + aquadoggo/src/network/service.rs | 33 +- aquadoggo/src/network/utils.rs | 24 ++ aquadoggo/src/node.rs | 14 +- aquadoggo/src/replication/ingest.rs | 6 +- aquadoggo/src/replication/service.rs | 15 +- aquadoggo/src/schema/schema_provider.rs | 54 +-- aquadoggo/src/tests.rs | 8 +- aquadoggo_cli/Cargo.toml | 8 +- aquadoggo_cli/README.md | 65 ---- aquadoggo_cli/config.toml | 175 +++++++++ aquadoggo_cli/example_config.toml | 19 - aquadoggo_cli/src/config.rs | 465 ++++++++++++++++++++++++ aquadoggo_cli/src/key_pair.rs | 15 +- aquadoggo_cli/src/main.rs | 152 ++------ aquadoggo_cli/src/schemas.rs | 18 - aquadoggo_cli/src/utils.rs | 20 + 26 files changed, 1066 insertions(+), 472 deletions(-) create mode 100644 aquadoggo/src/network/utils.rs create mode 100644 aquadoggo_cli/config.toml delete mode 100644 aquadoggo_cli/example_config.toml create mode 100644 aquadoggo_cli/src/config.rs delete mode 100644 aquadoggo_cli/src/schemas.rs create mode 100644 aquadoggo_cli/src/utils.rs diff --git a/.gitignore b/.gitignore index 7783ef8b4..462c846f4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ target/ debug/ + +# Config files config.toml +!aquadoggo_cli/config.toml # IDE .vscode diff --git a/CHANGELOG.md b/CHANGELOG.md index a248669a8..e37f45b66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Parse supported schema ids from `config.toml` [#473](https://github.com/p2panda/aquadoggo/pull/473) - Fix relayed connections, add DCUtR Holepunching and reduce CLI args [#502](https://github.com/p2panda/aquadoggo/pull/502) - Announce supported schema ids in network before replication [#515](https://github.com/p2panda/aquadoggo/pull/515) +- Improved configuration API with "config.toml" file, environment vars and command line arguments [#519](https://github.com/p2panda/aquadoggo/pull/519) ### Changed diff --git a/Cargo.lock b/Cargo.lock index 502cb6cc1..aea63f455 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -170,11 +170,9 @@ dependencies = [ "ciborium", "ctor", "deadqueue", - "directories", "dynamic-graphql", "env_logger", "envy", - "exponential-backoff", "futures", "hex", "http", @@ -190,6 +188,7 @@ dependencies = [ "proptest", "proptest-derive", "rand 0.8.5", + "regex", "reqwest", "rstest 0.15.0", "rstest_reuse 0.3.0", @@ -214,10 +213,16 @@ dependencies = [ "anyhow", "aquadoggo", "clap", + "colored", + "directories", "env_logger", + "figment", "hex", "libp2p", + "log", "p2panda-rs", + "path-clean", + "serde", "tempfile", "tokio", "toml", @@ -495,7 +500,7 @@ checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -517,7 +522,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -534,7 +539,7 @@ checksum = "a564d521dd56509c4c47480d00b80ee55f7e385ae48db5744c67ad50c92d2ebf" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -561,6 +566,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atomic" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59bdb34bc650a32731b31bd8f0829cc15d24a708ee31559e0bb34f2bc320cba" + [[package]] name = "atomic-waker" version = "1.1.1" @@ -919,6 +930,7 @@ dependencies = [ "anstream", "anstyle", "clap_lex", + "once_cell", "strsim", ] @@ -931,7 +943,7 @@ dependencies = [ "heck", "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -946,6 +958,17 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "colored" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2674ec482fbc38012cf31e6c42ba0177b431a0cb6f15fe40efa5aab1bda516f6" +dependencies = [ + "is-terminal", + "lazy_static", + "windows-sys", +] + [[package]] name = "concurrent-queue" version = "2.2.0" @@ -1248,11 +1271,11 @@ dependencies = [ [[package]] name = "directories" -version = "4.0.1" +version = "5.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f51c5d4ddabd36886dd3e1438cb358cdcb0d7c499cb99cb4ac2e38e18b5cb210" +checksum = "9a49173b84e034382284f27f1af4dcbbd231ffa358c0fe316541a7337f376a35" dependencies = [ - "dirs-sys", + "dirs-sys 0.4.1", ] [[package]] @@ -1261,7 +1284,7 @@ version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" dependencies = [ - "dirs-sys", + "dirs-sys 0.3.7", ] [[package]] @@ -1275,6 +1298,18 @@ dependencies = [ "winapi", ] +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys", +] + [[package]] name = "displaydoc" version = "0.2.4" @@ -1283,7 +1318,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -1438,15 +1473,6 @@ version = "2.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" -[[package]] -name = "exponential-backoff" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47f78d87d930eee4b5686a2ab032de499c72bd1e954b84262bb03492a0f932cd" -dependencies = [ - "rand 0.8.5", -] - [[package]] name = "fast_chemail" version = "0.9.6" @@ -1471,6 +1497,20 @@ version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e825f6987101665dea6ec934c09ec6d721de7bc1bf92248e1d5810c8cd636b77" +[[package]] +name = "figment" +version = "0.10.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4547e226f4c9ab860571e070a9034192b3175580ecea38da34fcdb53a018c9a5" +dependencies = [ + "atomic", + "pear", + "serde", + "toml", + "uncased", + "version_check", +] + [[package]] name = "flume" version = "0.10.14" @@ -1581,7 +1621,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -2005,6 +2045,12 @@ dependencies = [ "hashbrown 0.14.0", ] +[[package]] +name = "inlinable_string" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" + [[package]] name = "instant" version = "0.1.12" @@ -2575,7 +2621,7 @@ dependencies = [ "proc-macro-warning", "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -2700,9 +2746,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.19" +version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] name = "lru" @@ -3052,6 +3098,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "p2panda-rs" version = "0.7.1" @@ -3153,6 +3205,35 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" +[[package]] +name = "path-clean" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17359afc20d7ab31fdb42bb844c8b3bb1dabd7dcf7e68428492da7f16966fcef" + +[[package]] +name = "pear" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a386cd715229d399604b50d1361683fe687066f42d56f54be995bc6868f71c" +dependencies = [ + "inlinable_string", + "pear_codegen", + "yansi", +] + +[[package]] +name = "pear_codegen" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da9f0f13dac8069c139e8300a6510e3f4143ecf5259c60b116a9b271b4ca0d54" +dependencies = [ + "proc-macro2 1.0.66", + "proc-macro2-diagnostics", + "quote 1.0.31", + "syn 2.0.29", +] + [[package]] name = "pem" version = "1.1.1" @@ -3198,7 +3279,7 @@ dependencies = [ "pest_meta", "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -3229,7 +3310,7 @@ checksum = "ec2e072ecce94ec471b13398d5402c188e76ac03cf74dd1a975161b23a3f6d9c" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -3319,7 +3400,7 @@ checksum = "70550716265d1ec349c41f70dd4f964b4fd88394efe4405f0c1da679c4799a07" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -3340,6 +3421,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2 1.0.66", + "quote 1.0.31", + "syn 2.0.29", + "version_check", + "yansi", +] + [[package]] name = "prometheus-client" version = "0.21.2" @@ -3976,9 +4070,9 @@ checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" [[package]] name = "serde" -version = "1.0.171" +version = "1.0.185" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9" +checksum = "be9b6f69f1dfd54c3b568ffa45c310d6973a5e5148fd40cf515acaf38cf5bc31" dependencies = [ "serde_derive", ] @@ -4015,13 +4109,13 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.171" +version = "1.0.185" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "389894603bd18c46fa56231694f8d827779c0951a667087194cf9de94ed24682" +checksum = "dc59dfdcbad1437773485e0367fea4b090a2e0a16d9ffc46af47764536a298ec" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -4396,9 +4490,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.26" +version = "2.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970" +checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", @@ -4484,7 +4578,7 @@ checksum = "f1728216d3244de4f14f14f8c15c79be1a7c67867d28d69b719690e2a19fb445" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -4557,7 +4651,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -4711,7 +4805,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -4830,6 +4924,15 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" +[[package]] +name = "uncased" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b9bc53168a4be7402ab86c3aad243a84dd7381d09be0eddc81280c1da95ca68" +dependencies = [ + "version_check", +] + [[package]] name = "unicode-bidi" version = "0.3.13" @@ -5014,7 +5117,7 @@ dependencies = [ "once_cell", "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", "wasm-bindgen-shared", ] @@ -5048,7 +5151,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -5328,6 +5431,12 @@ dependencies = [ "static_assertions 1.1.0", ] +[[package]] +name = "yansi" +version = "1.0.0-rc.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1367295b8f788d371ce2dbc842c7b709c73ee1364d30351dd300ec2203b12377" + [[package]] name = "yasmf-hash" version = "0.1.1" @@ -5370,5 +5479,5 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] diff --git a/aquadoggo/Cargo.toml b/aquadoggo/Cargo.toml index 022c034ed..9e2953536 100644 --- a/aquadoggo/Cargo.toml +++ b/aquadoggo/Cargo.toml @@ -30,10 +30,7 @@ bs58 = "0.4.0" deadqueue = { version = "0.2.3", default-features = false, features = [ "unlimited", ] } -directories = "4.0.1" dynamic-graphql = "0.7.3" -envy = "0.4.2" -exponential-backoff = "1.2.0" futures = "0.3.23" hex = "0.4.3" http = "0.2.9" @@ -59,6 +56,7 @@ once_cell = "1.18.0" openssl-probe = "0.1.5" p2panda-rs = { version = "0.7.1", features = ["storage-provider"] } rand = "0.8.5" +regex = "1.9.3" serde = { version = "1.0.152", features = ["derive"] } sqlx = { version = "0.6.1", features = [ "any", @@ -87,6 +85,7 @@ async-recursion = "1.0.4" ciborium = "0.2.0" ctor = "0.1.23" env_logger = "0.9.0" +envy = "0.4.2" http = "0.2.9" hyper = "0.14.19" libp2p-swarm-test = "0.2.0" diff --git a/aquadoggo/src/config.rs b/aquadoggo/src/config.rs index 1712a3fed..de7b653f4 100644 --- a/aquadoggo/src/config.rs +++ b/aquadoggo/src/config.rs @@ -1,121 +1,69 @@ // SPDX-License-Identifier: AGPL-3.0-or-later -use std::fs; -use std::path::PathBuf; - -use anyhow::Result; -use directories::ProjectDirs; use p2panda_rs::schema::SchemaId; -use serde::Deserialize; use crate::network::NetworkConfiguration; -/// Data directory name. -const DATA_DIR_NAME: &str = "aquadoggo"; - -/// Filename of default sqlite database. -const DEFAULT_SQLITE_NAME: &str = "aquadoggo-node.sqlite3"; - /// Configuration object holding all important variables throughout the application. -/// -/// Each configuration also assures that a data directory exists on the host machine where database -/// files or private keys get persisted. -/// -/// When no custom directory path is set it reads the process environment $XDG_DATA_HOME variable -/// to determine the XDG data directory path which is $HOME/.local/share/aquadoggo on Linux by -/// default. -#[derive(Deserialize, Debug, Clone)] -#[serde(default)] +#[derive(Debug, Clone)] pub struct Configuration { - /// Path to data directory. - pub base_path: Option, + /// URL / connection string to PostgreSQL or SQLite database. + pub database_url: String, - /// Database url (SQLite or PostgreSQL). - pub database_url: Option, - - /// Maximum number of database connections in pool. + /// Maximum number of connections that the database pool should maintain. + /// + /// Be mindful of the connection limits for the database as well as other applications which + /// may want to connect to the same database (or even multiple instances of the same + /// application in high-availability deployments). pub database_max_connections: u32, - /// RPC API HTTP server port. + /// HTTP port, serving the GraphQL API (for example hosted under + /// http://localhost:2020/graphql). This API is used for client-node communication. Defaults to + /// 2020. pub http_port: u16, - /// Network configuration. - pub network: NetworkConfiguration, - - /// Materializer worker pool size. + /// Number of concurrent workers which defines the maximum of materialization tasks which can + /// be worked on simultaneously. + /// + /// Use a higher number if you run your node on a powerful machine with many CPU cores. Lower + /// number for low-energy devices with limited resources. pub worker_pool_size: u32, - /// The ids of schema this node supports. + /// List of schema ids which a node will replicate and expose on the GraphQL API. /// - /// If `None` then the node will support all system schema and any new schema it discovers. - pub supported_schema_ids: Option>, + /// When allowing a schema you automatically opt into announcing, replicating and materializing + /// documents connected to it, supporting applications which are dependent on this data. + pub allow_schema_ids: AllowList, + + /// Network configuration. + pub network: NetworkConfiguration, } impl Default for Configuration { fn default() -> Self { Self { - base_path: None, - database_url: None, + database_url: "sqlite::memory:".into(), database_max_connections: 32, http_port: 2020, - network: NetworkConfiguration::default(), worker_pool_size: 16, - supported_schema_ids: None, + allow_schema_ids: AllowList::Wildcard, + network: NetworkConfiguration::default(), } } } -impl Configuration { - /// Returns the data directory path and creates the folders when not existing. - fn create_data_directory(path: Option) -> Result { - // Use custom data directory path or determine one from host - let base_path = path.unwrap_or_else(|| { - ProjectDirs::from("", "", DATA_DIR_NAME) - .ok_or("Can not determine data directory") - .unwrap() - .data_dir() - .to_path_buf() - }); - - // Create folders when they don't exist yet - fs::create_dir_all(&base_path)?; - - Ok(base_path) - } - - /// Create a new configuration object pulling in the variables from the process environment. - /// This method also assures a data directory exists on the host machine. - pub fn new(path: Option) -> Result { - // Make sure data directory exists - let base_path = Self::create_data_directory(path)?; +/// Set a configuration value to either allow a defined set of elements or to a wildcard (*). +#[derive(Debug, Clone)] +pub enum AllowList { + /// Allow all possible items. + Wildcard, - // Create configuration based on defaults and populate with environment variables - let mut config = envy::from_env::()?; - - // Store data directory path in object - config.base_path = Some(base_path); - - // Set default database url (sqlite) when not given - config.database_url = match config.database_url { - Some(url) => Some(url), - None => { - let mut path = config.base_path.clone().unwrap(); - path.push(DEFAULT_SQLITE_NAME); - Some(format!("sqlite:{}", path.to_str().unwrap())) - } - }; - - Ok(config) - } + /// Allow only a certain set of items. + Set(Vec), } -#[cfg(test)] -impl Configuration { - /// Returns a new configuration object for a node which stores all data temporarily in memory. - pub fn new_ephemeral() -> Self { - Configuration { - database_url: Some("sqlite::memory:".to_string()), - ..Default::default() - } +impl Default for AllowList { + fn default() -> Self { + Self::Wildcard } } diff --git a/aquadoggo/src/http/service.rs b/aquadoggo/src/http/service.rs index 31da4afc8..7a2f40e02 100644 --- a/aquadoggo/src/http/service.rs +++ b/aquadoggo/src/http/service.rs @@ -62,11 +62,19 @@ pub async fn http_service( let builder = if let Ok(builder) = axum::Server::try_bind(&http_address) { builder } else { + println!("HTTP port {http_port} was already taken, try random port instead .."); axum::Server::try_bind(&SocketAddr::new(IpAddr::V4(Ipv4Addr::UNSPECIFIED), 0))? }; + let builder = builder.serve(build_server(http_context).into_make_service()); + + let local_address = builder.local_addr(); + println!( + "Go to http://{}/graphql to use GraphQL playground", + local_address + ); + builder - .serve(build_server(http_context).into_make_service()) .with_graceful_shutdown(async { debug!("HTTP service is ready"); if tx_ready.send(()).is_err() { diff --git a/aquadoggo/src/lib.rs b/aquadoggo/src/lib.rs index 2da7bdb3c..2c4eb447f 100644 --- a/aquadoggo/src/lib.rs +++ b/aquadoggo/src/lib.rs @@ -63,7 +63,7 @@ mod test_utils; #[cfg(test)] mod tests; -pub use crate::config::Configuration; +pub use crate::config::{AllowList, Configuration}; pub use crate::network::NetworkConfiguration; pub use node::Node; diff --git a/aquadoggo/src/network/behaviour.rs b/aquadoggo/src/network/behaviour.rs index c30a312b1..ef2cf55cd 100644 --- a/aquadoggo/src/network/behaviour.rs +++ b/aquadoggo/src/network/behaviour.rs @@ -87,16 +87,15 @@ impl P2pandaBehaviour { // Create an identify server behaviour with default configuration if a rendezvous server // address has been provided or the rendezvous server flag is set - let identify = - if network_config.relay_address.is_some() || network_config.relay_server_enabled { - debug!("Identify network behaviour enabled"); - Some(identify::Behaviour::new(identify::Config::new( - format!("{NODE_NAMESPACE}/1.0.0"), - key_pair.public(), - ))) - } else { - None - }; + let identify = if network_config.relay_address.is_some() || network_config.relay_mode { + debug!("Identify network behaviour enabled"); + Some(identify::Behaviour::new(identify::Config::new( + format!("{NODE_NAMESPACE}/1.0.0"), + key_pair.public(), + ))) + } else { + None + }; // Create an mDNS behaviour with default configuration if the mDNS flag is set let mdns = if network_config.mdns { @@ -126,7 +125,7 @@ impl P2pandaBehaviour { // Create a rendezvous server behaviour with default configuration if the rendezvous server // flag is set - let rendezvous_server = if network_config.relay_server_enabled { + let rendezvous_server = if network_config.relay_mode { debug!("Rendezvous server network behaviour enabled"); Some(rendezvous::server::Behaviour::new( rendezvous::server::Config::default(), @@ -141,7 +140,7 @@ impl P2pandaBehaviour { // Create a relay server behaviour with default configuration if the relay server flag is // set - let relay_server = if network_config.relay_server_enabled { + let relay_server = if network_config.relay_mode { debug!("Relay server network behaviour enabled"); Some(relay::Behaviour::new( peer_id, @@ -156,7 +155,7 @@ impl P2pandaBehaviour { }; // Create UDP holepunching behaviour (DCUtR) if the flag is set - let dcutr = if network_config.relay_server_enabled || relay_client.is_some() { + let dcutr = if network_config.relay_mode || relay_client.is_some() { Some(dcutr::Behaviour::new(peer_id)) } else { None diff --git a/aquadoggo/src/network/config.rs b/aquadoggo/src/network/config.rs index 214c35217..56b05e371 100644 --- a/aquadoggo/src/network/config.rs +++ b/aquadoggo/src/network/config.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: AGPL-3.0-or-later use libp2p::connection_limits::ConnectionLimits; -use libp2p::{Multiaddr, PeerId}; +use libp2p::Multiaddr; use serde::{Deserialize, Serialize}; /// The namespace used by the `identify` network behaviour. @@ -10,35 +10,37 @@ pub const NODE_NAMESPACE: &str = "aquadoggo"; /// Network config for the node. #[derive(Debug, Clone, Deserialize, Serialize)] pub struct NetworkConfiguration { - /// Dial concurrency factor. - /// - /// Number of addresses concurrently dialed for an outbound connection attempt with a single - /// peer. - pub dial_concurrency_factor: u8, - - /// Maximum incoming connections. - pub max_connections_in: u32, + /// QUIC port for node-to-node communication. + pub quic_port: u16, - /// Maximum outgoing connections. - pub max_connections_out: u32, + /// Discover peers on the local network via mDNS (over IPv4 only, using port 5353). + pub mdns: bool, - /// Maximum pending incoming connections. + /// List of known node addresses (IP + port) we want to connect to directly. /// - /// A pending connection is one which has been initiated but has not yet received a response. - pub max_connections_pending_in: u32, - - /// Maximum pending outgoing connections. + /// Make sure that nodes mentioned in this list are directly reachable (for example they need + /// to be hosted with a static IP Address). If you need to connect to nodes with changing, + /// dynamic IP addresses or even with nodes behind a firewall or NAT, do not use this field but + /// use at least one relay. + pub direct_node_addresses: Vec, + + /// Set to true if node should also function as a relay. Other nodes can use relays to aid + /// discovery and establishing connectivity. /// - /// A pending connection is one which has been initiated but has not yet received a response. - pub max_connections_pending_out: u32, - - /// Maximum connections per peer (includes outgoing and incoming). - pub max_connections_per_peer: u32, + /// Relays _need_ to be hosted in a way where they can be reached directly, for example with a + /// static IP address through an VPS. + pub relay_mode: bool, - /// mDNS discovery enabled. + /// Address of a peer which can act as a relay/rendezvous server. /// - /// Automatically discover peers on the local network (over IPv4 only, using port 5353). - pub mdns: bool, + /// Relays help discover other nodes on the internet (also known as "rendesvouz" or "bootstrap" + /// server) and help establishing direct p2p connections when node is behind a firewall or NAT + /// (also known as "holepunching"). + /// + /// When a direct connection is not possible the relay will help to redirect the (encrypted) + /// traffic as an intermediary between us and other nodes. The node will contact each server + /// and register our IP address for other peers. + pub relay_address: Option, /// Notify handler buffer size. /// @@ -55,22 +57,30 @@ pub struct NetworkConfiguration { /// manager will sleep. pub per_connection_event_buffer_size: usize, - /// The addresses of remote peers to replicate from. - pub remote_peers: Vec, + /// Dial concurrency factor. + /// + /// Number of addresses concurrently dialed for an outbound connection attempt with a single + /// peer. + pub dial_concurrency_factor: u8, + + /// Maximum incoming connections. + pub max_connections_in: u32, - /// QUIC transport port. - pub quic_port: u16, + /// Maximum outgoing connections. + pub max_connections_out: u32, - /// Relay server behaviour enabled. + /// Maximum pending incoming connections. /// - /// Serve as a relay point for peer connections. - pub relay_server_enabled: bool, + /// A pending connection is one which has been initiated but has not yet received a response. + pub max_connections_pending_in: u32, - /// Address of a peer which can act as a relay/rendezvous server. - pub relay_address: Option, + /// Maximum pending outgoing connections. + /// + /// A pending connection is one which has been initiated but has not yet received a response. + pub max_connections_pending_out: u32, - /// Peer id of the relay if known. - pub relay_peer_id: Option, + /// Maximum connections per peer (includes outgoing and incoming). + pub max_connections_per_peer: u32, } impl Default for NetworkConfiguration { @@ -82,14 +92,13 @@ impl Default for NetworkConfiguration { max_connections_pending_in: 8, max_connections_pending_out: 8, max_connections_per_peer: 8, - mdns: false, + mdns: true, + direct_node_addresses: Vec::new(), notify_handler_buffer_size: 128, per_connection_event_buffer_size: 8, quic_port: 2022, + relay_mode: false, relay_address: None, - relay_peer_id: None, - remote_peers: Vec::new(), - relay_server_enabled: false, } } } diff --git a/aquadoggo/src/network/mod.rs b/aquadoggo/src/network/mod.rs index 514c5e9c7..a9b531dd8 100644 --- a/aquadoggo/src/network/mod.rs +++ b/aquadoggo/src/network/mod.rs @@ -8,6 +8,7 @@ mod service; mod shutdown; mod swarm; mod transport; +pub mod utils; pub use config::NetworkConfiguration; pub use peers::{Peer, PeerMessage}; diff --git a/aquadoggo/src/network/service.rs b/aquadoggo/src/network/service.rs index 3e7b76f6a..6b7658482 100644 --- a/aquadoggo/src/network/service.rs +++ b/aquadoggo/src/network/service.rs @@ -19,7 +19,7 @@ use crate::context::Context; use crate::manager::{ServiceReadySender, Shutdown}; use crate::network::behaviour::{Event, P2pandaBehaviour}; use crate::network::config::NODE_NAMESPACE; -use crate::network::{identity, peers, swarm, NetworkConfiguration, ShutdownHandler}; +use crate::network::{identity, peers, swarm, utils, NetworkConfiguration, ShutdownHandler}; /// Network service which handles all networking logic for a p2panda node. /// @@ -41,10 +41,10 @@ pub async fn network_service( let key_pair = identity::to_libp2p_key_pair(&context.key_pair); let local_peer_id = key_pair.public().to_peer_id(); - info!("Local peer id: {local_peer_id}"); + println!("Peer id: {local_peer_id}"); // The swarm can be initiated with or without "relay" capabilities. - let mut swarm = if network_config.relay_server_enabled { + let mut swarm = if network_config.relay_mode { info!("Networking service initializing with relay capabilities..."); swarm::build_relay_swarm(&network_config, key_pair).await? } else { @@ -74,6 +74,10 @@ pub async fn network_service( .with(Protocol::from(Ipv4Addr::UNSPECIFIED)) .with(Protocol::Udp(0)) .with(Protocol::QuicV1); + println!( + "QUIC port {} was already taken, try random port instead ..", + network_config.quic_port + ); swarm.listen_on(random_port_addr)?; } @@ -104,14 +108,14 @@ pub async fn connect_to_relay( swarm.behaviour_mut().peers.disable(); // Connect to the relay server. Not for the reservation or relayed connection, but to (a) learn - // our local public address and (b) enable a freshly started relay to learn its public - // address. + // our local public address and (b) enable a freshly started relay to learn its public address. swarm.dial(relay_address.clone())?; // Wait to get confirmation that we told the relay node it's public address and that they told // us ours. let mut learned_observed_addr = false; let mut told_relay_observed_addr = false; + let mut learned_relay_peer_id: Option = None; loop { match swarm.next().await.unwrap() { @@ -138,7 +142,7 @@ pub async fn connect_to_relay( relay_address.push(Protocol::P2p(peer_id)); // Update values on the config. - network_config.relay_peer_id = Some(peer_id); + learned_relay_peer_id = Some(peer_id); network_config.relay_address = Some(relay_address.clone()); // All done, we've learned our external address successfully. @@ -153,9 +157,7 @@ pub async fn connect_to_relay( } // We know the relays peer address was learned in the above step so we unwrap it here. - let relay_peer_id = network_config - .relay_peer_id - .expect("Received relay peer id"); + let relay_peer_id = learned_relay_peer_id.expect("Received relay peer id"); // Now we have received our external address, and we know the relay has too, listen on our // relay circuit address. @@ -262,6 +264,7 @@ struct EventLoop { rx: BroadcastStream, network_config: NetworkConfiguration, shutdown_handler: ShutdownHandler, + learned_port: bool, } impl EventLoop { @@ -279,6 +282,7 @@ impl EventLoop { tx, network_config, shutdown_handler, + learned_port: false, } } @@ -308,6 +312,17 @@ impl EventLoop { event = self.swarm.next() => { let event = event.expect("Swarm stream to be infinite"); match event { + SwarmEvent::NewListenAddr { address, .. } => { + if self.learned_port { + continue; + } + + // Only only one QUIC address once + if let Some(address) = utils::to_quic_address(&address) { + println!("Node is listening on 0.0.0.0:{}", address.port()); + self.learned_port = true; + } + } SwarmEvent::Behaviour(Event::Identify(event)) => self.handle_identify_events(&event).await, SwarmEvent::Behaviour(Event::Mdns(event)) => self.handle_mdns_events(&event).await, SwarmEvent::Behaviour(Event::RendezvousClient(event)) => self.handle_rendezvous_client_events(&event).await, diff --git a/aquadoggo/src/network/utils.rs b/aquadoggo/src/network/utils.rs new file mode 100644 index 000000000..5f320d4d0 --- /dev/null +++ b/aquadoggo/src/network/utils.rs @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +use std::net::SocketAddr; + +use libp2p::Multiaddr; +use regex::Regex; + +pub fn to_quic_address(address: &Multiaddr) -> Option { + let hay = address.to_string(); + let regex = Regex::new(r"/ip4/(\d+.\d+.\d+.\d+)/udp/(\d+)/quic-v1").unwrap(); + let caps = regex.captures(&hay); + + match caps { + None => None, + Some(caps) => { + let ip_address = caps.get(1).unwrap().as_str(); + let port = caps.get(2).unwrap().as_str(); + let socket = format!("{ip_address}:{port}") + .parse::() + .expect("Tried to convert invalid address"); + Some(socket) + } + } +} diff --git a/aquadoggo/src/node.rs b/aquadoggo/src/node.rs index 027466ad3..3bd70c151 100644 --- a/aquadoggo/src/node.rs +++ b/aquadoggo/src/node.rs @@ -24,14 +24,10 @@ async fn initialize_db(config: &Configuration) -> Result { openssl_probe::init_ssl_cert_env_vars(); // Create database when not existing - create_database(&config.database_url.clone().unwrap()).await?; + create_database(&config.database_url).await?; // Create connection pool - let pool = connection_pool( - &config.database_url.clone().unwrap(), - config.database_max_connections, - ) - .await?; + let pool = connection_pool(&config.database_url, config.database_max_connections).await?; // Run pending migrations run_pending_migrations(&pool).await?; @@ -60,11 +56,11 @@ impl Node { // Initiate the SchemaProvider with all currently known schema from the store. // - // If supported_schema_ids are provided then only schema identified in this list will be - // added to the provider and supported by the node. + // If a list of allowed schema ids is provided then only schema identified in this list + // will be added to the provider and supported by the node. let application_schema = store.get_all_schema().await.unwrap(); let schema_provider = - SchemaProvider::new(application_schema, config.supported_schema_ids.clone()); + SchemaProvider::new(application_schema, config.allow_schema_ids.clone()); // Create service manager with shared data between services let context = Context::new(store, key_pair, config, schema_provider); diff --git a/aquadoggo/src/replication/ingest.rs b/aquadoggo/src/replication/ingest.rs index 0aee97444..52bd2b5b9 100644 --- a/aquadoggo/src/replication/ingest.rs +++ b/aquadoggo/src/replication/ingest.rs @@ -49,9 +49,9 @@ impl SyncIngest { let plain_operation = decode_operation(encoded_operation)?; - // If the node has been configured with a whitelist of supported schema ids, check that the - // sent operation follows one of our supported schema - if self.schema_provider.is_whitelist_active() + // If the node has been configured with an allow-list of supported schema ids, check that + // the sent operation follows one of our supported schema + if self.schema_provider.is_allow_list_active() && self .schema_provider .supported_schema_ids() diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index 83268e07a..73ba808c2 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -158,8 +158,8 @@ impl ConnectionManager { /// Returns set of schema ids we are interested in and support on this node. async fn target_set(&self) -> TargetSet { - let supported_schema_ids = self.schema_provider.supported_schema_ids().await; - TargetSet::new(&supported_schema_ids) + let allow_schema_ids = self.schema_provider.supported_schema_ids().await; + TargetSet::new(&allow_schema_ids) } /// Register a new peer connection on the manager. @@ -243,18 +243,18 @@ impl ConnectionManager { // If this is a SyncRequest message first we check if the contained target set matches our // own locally configured one. - if let Message::SyncRequest(_, remote_supported_schema_ids) = message.message() { + if let Message::SyncRequest(_, target_set) = message.message() { let local_supported_schema_ids = &self .announcement .as_ref() .expect("Announcement state needs to be set with 'update_announcement'") .supported_schema_ids; - // If this node has been configured with a whitelist of schema ids then we check the + // If this node has been configured with an allow list of schema ids then we check the // target set of the requests matches our own, otherwise we skip this step and accept // any target set. - if self.schema_provider.is_whitelist_active() - && !local_supported_schema_ids.is_valid_set(remote_supported_schema_ids) + if self.schema_provider.is_allow_list_active() + && !local_supported_schema_ids.is_valid_set(target_set) { // If it doesn't match we signal that an error occurred and return at this point. self.on_replication_error(peer, session_id, ReplicationError::UnsupportedTargetSet) @@ -530,6 +530,7 @@ mod tests { }; use crate::schema::SchemaProvider; use crate::test_utils::{test_runner, TestNode}; + use crate::AllowList; use super::ConnectionManager; @@ -619,7 +620,7 @@ mod tests { test_runner(move |node: TestNode| async move { let (tx, mut rx) = broadcast::channel::(10); - let schema_provider = SchemaProvider::new(vec![], Some(vec![])); + let schema_provider = SchemaProvider::new(vec![], AllowList::Set(vec![])); let mut manager = ConnectionManager::new(&schema_provider, &node.context.store, &tx, local_peer_id); manager.update_announcement().await; diff --git a/aquadoggo/src/schema/schema_provider.rs b/aquadoggo/src/schema/schema_provider.rs index 7f67bd807..4eeeeb8f5 100644 --- a/aquadoggo/src/schema/schema_provider.rs +++ b/aquadoggo/src/schema/schema_provider.rs @@ -4,23 +4,25 @@ use std::collections::HashMap; use std::sync::Arc; use anyhow::{bail, Result}; -use log::{debug, info}; +use log::{debug, info, trace}; use p2panda_rs::schema::{Schema, SchemaId, SYSTEM_SCHEMAS}; use p2panda_rs::Human; use tokio::sync::broadcast::{channel, Receiver, Sender}; use tokio::sync::Mutex; -/// Provides fast thread-safe access to system and application schemas. +use crate::config::AllowList; + +/// Provides fast access to system and application schemas. /// /// Application schemas can be added and updated. #[derive(Clone, Debug)] pub struct SchemaProvider { - /// In-memory store of registered schemas. + /// In-memory store of registered and materialized schemas. schemas: Arc>>, - /// Optional list of whitelisted schema ids. When set, only these schema ids will be accepted - /// on this node, if not set _all_ schema ids are accepted. - whitelisted_schema_ids: Option>, + /// Optional list of allowed schema ids. When not empty, only these schema ids will be accepted + /// on this node, if not set _all_ schema ids are accepted (wildcard). + allow_schema_ids: AllowList, /// Sender for broadcast channel informing subscribers about updated schemas. tx: Sender, @@ -28,10 +30,7 @@ pub struct SchemaProvider { impl SchemaProvider { /// Returns a `SchemaProvider` containing the given application schemas and all system schemas. - pub fn new( - application_schemas: Vec, - whitelisted_schema_ids: Option>, - ) -> Self { + pub fn new(application_schemas: Vec, allow_schema_ids: AllowList) -> Self { // Collect all system and application schemas. let mut schemas = SYSTEM_SCHEMAS.clone(); schemas.extend(&application_schemas); @@ -42,13 +41,14 @@ impl SchemaProvider { index.insert(schema.id().to_owned(), schema.to_owned()); } - if let Some(schema_ids) = &whitelisted_schema_ids { + // Filter out all unsupported schema ids when list was set + if let AllowList::Set(schema_ids) = &allow_schema_ids { index.retain(|id, _| schema_ids.contains(id)); }; let (tx, _) = channel(64); - debug!( + trace!( "Initialised schema provider:\n- {}", index .values() @@ -59,7 +59,7 @@ impl SchemaProvider { Self { schemas: Arc::new(Mutex::new(index)), - whitelisted_schema_ids, + allow_schema_ids, tx, } } @@ -84,8 +84,8 @@ impl SchemaProvider { /// Returns `true` if a schema was updated or it already existed in it's current state, and /// `false` if it was inserted. pub async fn update(&self, schema: Schema) -> Result { - if let Some(whitelisted_ids) = self.whitelisted_schema_ids.as_ref() { - if !whitelisted_ids.contains(schema.id()) { + if let AllowList::Set(allow_schema_ids) = &self.allow_schema_ids { + if !allow_schema_ids.contains(schema.id()) { bail!("Attempted to add unsupported schema to schema provider"); } }; @@ -114,12 +114,12 @@ impl SchemaProvider { /// Returns a list of all supported schema ids. /// - /// If no whitelist was set it returns the list of all currently known schema ids. If a - /// whitelist was set it directly returns the list itself. + /// If no allow-list was set it returns the list of all currently known schema ids. If an + /// allo-wlist was set it directly returns the list itself. pub async fn supported_schema_ids(&self) -> Vec { - match &self.whitelisted_schema_ids { - Some(schema_ids) => schema_ids.clone(), - None => self + match &self.allow_schema_ids { + AllowList::Set(schema_ids) => schema_ids.clone(), + AllowList::Wildcard => self .all() .await .iter() @@ -128,16 +128,16 @@ impl SchemaProvider { } } - /// Returns true if a whitelist of supported schema ids was provided through user + /// Returns true if an allow-list of supported schema ids was provided through user /// configuration. - pub fn is_whitelist_active(&self) -> bool { - self.whitelisted_schema_ids.is_some() + pub fn is_allow_list_active(&self) -> bool { + matches!(self.allow_schema_ids, AllowList::Set(_)) } } impl Default for SchemaProvider { fn default() -> Self { - Self::new(Vec::new(), None) + Self::new(Vec::new(), AllowList::Wildcard) } } @@ -146,6 +146,8 @@ mod test { use p2panda_rs::schema::{FieldType, Schema, SchemaId, SchemaName}; use p2panda_rs::test_utils::fixtures::random_document_view_id; + use crate::AllowList; + use super::SchemaProvider; #[tokio::test] @@ -194,7 +196,7 @@ mod test { &[("test_field", FieldType::String)], ) .unwrap(); - let provider = SchemaProvider::new(vec![], Some(vec![new_schema_id.clone()])); + let provider = SchemaProvider::new(vec![], AllowList::Set(vec![new_schema_id.clone()])); let result = provider.update(new_schema).await; assert!(result.is_ok()); assert!(!result.unwrap()); @@ -204,7 +206,7 @@ mod test { #[tokio::test] async fn update_unsupported_schemas() { - let provider = SchemaProvider::new(vec![], Some(vec![])); + let provider = SchemaProvider::new(vec![], AllowList::Set(vec![])); let new_schema_id = SchemaId::Application( SchemaName::new("test_schema").unwrap(), random_document_view_id(), diff --git a/aquadoggo/src/tests.rs b/aquadoggo/src/tests.rs index 00ba33122..2aea479f5 100644 --- a/aquadoggo/src/tests.rs +++ b/aquadoggo/src/tests.rs @@ -37,13 +37,7 @@ async fn e2e() { // designed to be "local first" which means they are fine if there is currently no internet // connection on your computer. - // Node configuration. - // - // Before even starting the node, we need to configure it a little. We mostly go for the - // default options. The only thing we want to do change is the database config. We want an - // in-memory sqlite database for this test. - - let config = Configuration::new_ephemeral(); + let config = Configuration::default(); let key_pair = KeyPair::new(); // Start the node. diff --git a/aquadoggo_cli/Cargo.toml b/aquadoggo_cli/Cargo.toml index 959217228..a3b36ab74 100644 --- a/aquadoggo_cli/Cargo.toml +++ b/aquadoggo_cli/Cargo.toml @@ -21,11 +21,17 @@ doc = false [dependencies] anyhow = "1.0.62" -clap = { version = "4.1.8", features = ["derive"] } +clap = { version = "4.1.8", features = ["derive", "cargo"] } +colored = "2.0.4" +directories = "5.0.1" env_logger = "0.9.0" +figment = { version = "0.10.10", features = ["toml", "env"] } hex = "0.4.3" libp2p = "0.52.0" +log = "0.4.20" p2panda-rs = "0.7.1" +path-clean = "1.0.1" +serde = { version = "1.0.185", features = ["serde_derive"] } tokio = { version = "1.28.2", features = ["full"] } toml = "0.7.6" diff --git a/aquadoggo_cli/README.md b/aquadoggo_cli/README.md index 9d65535db..6b29ab298 100644 --- a/aquadoggo_cli/README.md +++ b/aquadoggo_cli/README.md @@ -2,71 +2,6 @@ Node server with GraphQL API for the p2panda network. -## Usage - -``` - -d, --data-dir - Path to data folder, $HOME/.local/share/aquadoggo by default on Linux - - -P, --http-port - Port for the http server, 2020 by default - - -q, --quic-port - Port for the QUIC transport, 2022 by default for a relay/rendezvous node - - -r, --remote-node-addresses - URLs of remote nodes to replicate with - - -m, --mdns - Enable mDNS for peer discovery over LAN (using port 5353), false by default - - [possible values: true, false] - - --enable-relay-server - Enable relay server to facilitate peer connectivity, false by default - - --relay-addr - IP address for the relay peer. - - eg. --relay-addr "127.0.0.1" - - --relay-port - Port for the relay peer, defaults to expected relay port 2022. - - eg. --relay-port "1234" - - -h, --help - Print help (see a summary with '-h') - - -V, --version - Print version -``` - -## Environment variables - -* `RUST_LOG` Can be set to `warn`, `error`, `info`, `debug`, `trace` for logging. -* `DATABASE_URL` Database url (SQLite, PostgreSQL) (default `sqlite:/aquadoggo-node.sqlite3`). -* `DATABASE_MAX_CONNECTIONS` Maximum number of database connections in pool (default `32`). -* `HTTP_PORT` HTTP server port for GraphQL API (default `2020`). -* `WORKER_POOL_SIZE` Materializer worker pool size (default `16`). - -**Example:** - -```bash -# For all debug logs from `aquadoggo` and external crates -RUST_LOG=debug DATABASE_URL=postgres://postgres:postgres@localhost:5432/db cargo run - -# For compact info logs, only directly coming from `aquadoggo` -RUST_LOG=aquadoggo=info DATABASE_URL=postgres://postgres:postgres@localhost:5432/db cargo run -``` - -## Configuring supported schema - -If a `config.toml` file is present then `aquadoggo` will read `supported_schema_ids` at start-up -and be configured to only replicate and offer a query API for schema identified by the listed ids. - -See `example_config.toml` for further instructions. - ## Development ```bash diff --git a/aquadoggo_cli/config.toml b/aquadoggo_cli/config.toml new file mode 100644 index 000000000..4c9651a58 --- /dev/null +++ b/aquadoggo_cli/config.toml @@ -0,0 +1,175 @@ +# aquadoggo configuration file +# +# 1. Copy this file to the location where a) aquadoggo will run b) in an XDG +# compliant config directory (for example "$HOME/.config/aquadoggo/config.toml" +# on Linux) or c) refer to it using the -c command line argument when running +# aquadoggo. +# 2. Replace the example values with your own desired configuration. All values +# in this template resemble the defaults +# 3. Check out our website https://p2panda.org for tutorials or official chat +# for further help finding the right configuration for your p2panda network +# +# NOTE: Paths in this file follow the XDG Base Directory Specification for +# Linux. You might want to adjust these values for your regarding operating +# system. + +# ゚・。+☆+。 +# SCHEMAS +# ゚・。+☆+。 + +# List of schema ids which a node will replicate, persist and expose on the +# GraphQL API. +# +# When allowing a schema you automatically opt into announcing, replicating and +# materializing documents connected to it, supporting applications and networks +# which are dependent on this data. +# +# It is recommended to set this list to all schema ids your own application +# should support, including all important system schemas. For example: +# +# allow_schema_ids = [ +# # To discover new schemas, set your node to replicate schema definition +# # documents by including these two built-in schema ids. Your node will now +# # search for and replicate schemas which have been published to the +# # network. +# "schema_definition_v1", +# "schema_field_definition_v1", +# +# # Once you discover new schemas and want to start replicating their +# # documents, then add their schema ids to this list as well. It's also +# # possible to create and load schemas directly onto your node using the +# # tool `fishy`: https://github.com/p2panda/fishy +# "my_interesting_schema_0020a01fe...", +# ] +# +# WARNING: When set to wildcard "*", your node will support _any_ schemas it +# will encounter on the network. This is useful for experimentation and local +# development but _not_ recommended for production settings. +# +allow_schema_ids = "*" + +# ゚・。+☆+。・ +# DATABASE +# ゚・。+☆+。・ + +# URL / connection string to PostgreSQL or SQLite database. +# +# When commented out it will default to an in-memory SQLite database URL. +# +# WARNING: When commented out, no data will be persisted after the node shuts +# down. Uncomment this value when running on production as you will otherwise +# loose data. +# +# database_url = "sqlite:$HOME/.local/share/aquadoggo/db.sqlite3" + +# Maximum number of connections that the database pool should maintain. +# +# Be mindful of the connection limits for your database as well as other +# applications which may want to connect to the same database (or even multiple +# instances of the same application in high-availability deployments). +# +database_max_connections = 32 + +# ゚・。+☆+。・ +# WORKERS +# ゚・。+☆+。・ + +# Number of concurrent workers which defines the maximum of materialization +# tasks which can be worked on simultaneously. +# +# Use a higher number if you run your node on a powerful machine with many CPU +# cores. Lower number for low-energy devices with limited resources. +# +worker_pool_size = 16 + +# ゚・。+☆ +# PORTS +# ゚・。+☆ + +# HTTP port, serving the GraphQL API (for example hosted under +# http://localhost:2020/graphql). This API is used for client-node +# communication. Defaults to 2020. +# +# When port is taken the node will automatically pick a random, free port. +# +http_port = 2020 + +# QUIC port for node-node communication and data replication. Defaults to 2022. +# +# When port is taken the node will automatically pick a random, free port. +# +quic_port = 2022 + +# ゚・。+☆+。・ +# IDENTITY +# ゚・。+☆+。・ + +# Path to persist your ed25519 private key file. The key is used to identify +# you towards other nodes during network discovery and replication. This key is +# _not_ used to create and sign data. +# +# If a path is set, a key will be generated newly and stored under this path +# when node starts for the first time. +# +# When comment out or no path is set, your node will generate an ephemeral +# private key on every start up and _not_ persist it. +# +# private_key = "$HOME/.local/share/aquadoggo/private-key.txt" + +# ゚・。+☆+。・゚・。+☆+ +# LOCAL NETWORKS +# ゚・。+☆+。・゚・。+☆+ + +# mDNS to discover other peers on the local network. Enabled by default. +# +mdns = true + +# ゚・。+☆ +# NODES +# ゚・。+☆ + +# List of known node addresses (IP + port) we want to connect to directly. +# +# NOTE: Make sure that nodes mentioned in this list are directly reachable (for +# example they need to be hosted with a static IP Address). If you need to +# connect to nodes with changing, dynamic IP addresses or even with nodes +# behind a firewall or NAT, do not use this field but use at least one relay. +# +direct_node_addresses = [ + # "192.0.2.0:2022", + # "192.0.2.2:3000", +] + +# ゚・。+☆ +# RELAY +# ゚・。+☆ + +# Address of a relay. +# +# A relay helps discover other nodes on the internet (also known as +# "rendesvouz" or "bootstrap" server) and helps establishing direct p2p +# connections when node is behind a firewall or NAT (also known as +# "holepunching"). +# +# When a direct connection is not possible the relay will help to redirect the +# (encrypted) traffic as an intermediary between us and other nodes. The node +# will contact the relay and register our IP address for other peers. +# +# WARNING: This will potentially expose your IP address on the network. Do only +# connect to trusted relays or make sure your IP address is hidden via a VPN or +# proxy if you're concerned about leaking your IP. +# +# WARNING: Using a relay will potentially connect you to untrusted / unknown +# nodes with which you will then exchange data with. If in doubt, use the list +# of known node addresses instead and only connect to trusted nodes. +# +# relay_address = "192.0.2.16:2022" + +# Set to true if node should also function as a relay. Defaults to false. +# +# Other nodes can use relays to aid discovery and establishing connectivity. +# +# NOTE: Relays _need_ to be hosted in a way where they can be reached directly, +# for example with a static IP address through an VPS. +# +relay_mode = false diff --git a/aquadoggo_cli/example_config.toml b/aquadoggo_cli/example_config.toml deleted file mode 100644 index 82c127373..000000000 --- a/aquadoggo_cli/example_config.toml +++ /dev/null @@ -1,19 +0,0 @@ -# Example config.toml file -# -# Copy this file to the location where aquadoggo will be run and rename to `config.toml`. Replace -# the example values with your own desired configuration. - -# List of schema ids which will configure which documents a node will replicate and expose on the -# GraphQL API. -supported_schema_ids = [ - # To discover new schema, set your node to replicate schema definition documents by including these - # two built-in schema ids. Your node will now search for and replicate schemas which have been - # published to the network. - "schema_field_definition_v1", - "schema_definition_v1", - - # Once you discover new schemas and want to start replicating their documents, then add their - # schema ids to this list as well. It's also possible to load schema directly onto your node - # using the tool `fishy`: https://github.com/p2panda/fishy - "my_interesting_schema_0020a01f72a5f28f6a559b4942e3525de2bb2413d05897526fe2250e3b57384983a2", -] diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs new file mode 100644 index 000000000..795b2b0ce --- /dev/null +++ b/aquadoggo_cli/src/config.rs @@ -0,0 +1,465 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +use std::convert::TryFrom; +use std::net::{IpAddr, SocketAddr}; +use std::path::PathBuf; +use std::str::FromStr; + +use anyhow::{anyhow, bail, Result}; +use aquadoggo::{AllowList, Configuration as NodeConfiguration, NetworkConfiguration}; +use clap::{crate_version, Parser}; +use colored::Colorize; +use directories::ProjectDirs; +use figment::providers::{Env, Format, Serialized, Toml}; +use figment::Figment; +use libp2p::multiaddr::Protocol; +use libp2p::Multiaddr; +use p2panda_rs::schema::SchemaId; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +use crate::utils::absolute_path; + +const WILDCARD: &str = "*"; + +const CONFIG_FILE_NAME: &str = "config.toml"; + +type ConfigFilePath = Option; + +/// Get configuration from 1. .toml file, 2. environment variables and 3. command line arguments +/// (in that order, meaning that later configuration sources take precedence over the earlier +/// ones). +/// +/// Returns a partly unchecked configuration object which results from all of these sources. It +/// still needs to be converted for aquadoggo as it might still contain invalid values. +pub fn load_config() -> Result<(ConfigFilePath, Configuration)> { + // Parse command line arguments first to get optional config file path + let cli = Cli::parse(); + + // Determine if a config file path was provided or if we should look for it in common locations + let config_file_path: ConfigFilePath = match &cli.config { + Some(path) => { + if !path.exists() { + bail!("Config file '{}' does not exist", path.display()); + } + + Some(path.clone()) + } + None => try_determine_config_file_path(), + }; + + let mut figment = Figment::from(Serialized::defaults(Configuration::default())); + if let Some(path) = &config_file_path { + figment = figment.merge(Toml::file(path)); + } + + let config = figment + .merge(Env::raw()) + .merge(Serialized::defaults(cli)) + .extract()?; + + Ok((config_file_path, config)) +} + +/// Configuration derived from command line arguments. +/// +/// All arguments are optional and don't get serialized to Figment when they're None. This is to +/// assure that default values do not overwrite all previous settings, especially when they haven't +/// been set. +#[derive(Parser, Serialize, Debug)] +#[command( + name = "aquadoggo Node", + about = "Node server for the p2panda network", + long_about = None, + version +)] +struct Cli { + /// Path to an optional "config.toml" file for further configuration. + /// + /// When not set the program will try to find a `config.toml` file in the same folder the + /// program is executed in and otherwise in the regarding operation systems XDG config + /// directory ("$HOME/.config/aquadoggo/config.toml" on Linux). + #[arg(short = 'c', long, value_name = "PATH")] + #[serde(skip_serializing_if = "Option::is_none")] + config: Option, + + /// List of schema ids which a node will replicate, persist and expose on the GraphQL API. + /// Separate multiple values with a whitespace. Defaults to allow _any_ schemas ("*"). + /// + /// When allowing a schema you automatically opt into announcing, replicating and materializing + /// documents connected to it, supporting applications and networks which are dependent on this + /// data. + /// + /// It is recommended to set this list to all schema ids your own application should support, + /// including all important system schemas. + /// + /// WARNING: When set to wildcard "*", your node will support _any_ schemas it will encounter + /// on the network. This is useful for experimentation and local development but _not_ + /// recommended for production settings. + #[arg(short = 's', long, value_name = "SCHEMA_ID", num_args = 0..)] + #[serde( + skip_serializing_if = "Option::is_none", + serialize_with = "serialize_with_wildcard" + )] + allow_schema_ids: Option>, + + /// URL / connection string to PostgreSQL or SQLite database. Defaults to an in-memory SQLite + /// database. + /// + /// WARNING: By default your node will not persist anything after shutdown. Set a database + /// connection url for production settings to not loose data. + #[arg(short = 'd', long, value_name = "CONNECTION_STRING")] + #[serde(skip_serializing_if = "Option::is_none")] + database_url: Option, + + /// HTTP port for client-node communication, serving the GraphQL API. Defaults to 2020. + #[arg(short = 'p', long, value_name = "PORT")] + #[serde(skip_serializing_if = "Option::is_none")] + http_port: Option, + + /// QUIC port for node-node communication and data replication. Defaults to 2022. + #[arg(short = 'q', long, value_name = "PORT")] + #[serde(skip_serializing_if = "Option::is_none")] + quic_port: Option, + + /// Path to persist your ed25519 private key file. Defaults to an ephemeral key only for this + /// current session. + /// + /// The key is used to identify you towards other nodes during network discovery and + /// replication. This key is _not_ used to create and sign data. + /// + /// If a path is set, a key will be generated newly and stored under this path when node starts + /// for the first time. + /// + /// When no path is set, your node will generate an ephemeral private key on every start up and + /// _not_ persist it. + #[arg(short = 'k', long, value_name = "PATH")] + #[serde(skip_serializing_if = "Option::is_none")] + private_key: Option, + + /// mDNS to discover other peers on the local network. Enabled by default. + #[arg( + short = 'm', + long, + value_name = "BOOL", + default_missing_value = "true", + num_args = 0..=1, + )] + #[serde(skip_serializing_if = "Option::is_none")] + mdns: Option, + + /// List of known node addresses we want to connect to directly. + /// + /// Make sure that nodes mentioned in this list are directly reachable (for example they need + /// to be hosted with a static IP Address). If you need to connect to nodes with changing, + /// dynamic IP addresses or even with nodes behind a firewall or NAT, do not use this field but + /// use at least one relay. + #[arg(short = 'n', long, value_name = "IP:PORT", num_args = 0..)] + #[serde(skip_serializing_if = "Option::is_none")] + direct_node_addresses: Option>, + + /// Address of a relay. + /// + /// A relay helps discover other nodes on the internet (also known as "rendesvouz" or + /// "bootstrap" server) and helps establishing direct p2p connections when node is behind a + /// firewall or NAT (also known as "holepunching"). + /// + /// WARNING: This will potentially expose your IP address on the network. Do only connect to + /// trusted relays or make sure your IP address is hidden via a VPN or proxy if you're + /// concerned about leaking your IP. + #[arg(short = 'r', long, value_name = "IP:PORT")] + #[serde(skip_serializing_if = "Option::is_none")] + relay_address: Option, + + /// Enable if node should also function as a relay. Disabled by default. + /// + /// Other nodes can use relays to aid discovery and establishing connectivity. + /// + /// Relays _need_ to be hosted in a way where they can be reached directly, for example with a + /// static IP address through an VPS. + #[arg( + short = 'e', + long, + value_name = "BOOL", + default_missing_value = "true", + num_args = 0..=1, + )] + #[serde(skip_serializing_if = "Option::is_none")] + relay_mode: Option, +} + +/// Clap converts wildcard symbols from command line arguments (for example --supported-schema-ids +/// "*") into an array, (["*"]), but we need it to be just a string ("*"). +fn serialize_with_wildcard( + list: &Option>, + serializer: S, +) -> std::result::Result +where + S: Serializer, +{ + match list { + Some(list) => { + if list.len() == 1 && list[0] == WILDCARD { + // Wildcard symbol comes in form of an array ["*"], convert it to just a string "*" + serializer.serialize_str(WILDCARD) + } else if list.len() == 1 && list[0].is_empty() { + // Empty string should not lead to [""] but to an empty array [] + Vec::>::new().serialize(serializer) + } else { + list.serialize(serializer) + } + } + None => unreachable!("Serialization is skipped if value is None"), + } +} + +/// Configuration derived from environment variables and .toml file. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Configuration { + pub allow_schema_ids: UncheckedAllowList, + pub database_url: String, + pub database_max_connections: u32, + pub worker_pool_size: u32, + pub http_port: u16, + pub quic_port: u16, + pub private_key: Option, + pub mdns: bool, + pub direct_node_addresses: Vec, + pub relay_address: Option, + pub relay_mode: bool, +} + +impl Default for Configuration { + fn default() -> Self { + Self { + allow_schema_ids: UncheckedAllowList::Wildcard, + database_url: "sqlite::memory:".into(), + database_max_connections: 32, + worker_pool_size: 16, + http_port: 2020, + quic_port: 2022, + private_key: None, + mdns: true, + direct_node_addresses: vec![], + relay_address: None, + relay_mode: false, + } + } +} + +impl TryFrom for NodeConfiguration { + type Error = anyhow::Error; + + fn try_from(value: Configuration) -> Result { + // Check if given schema ids are valid + let allow_schema_ids = match value.allow_schema_ids { + UncheckedAllowList::Wildcard => AllowList::::Wildcard, + UncheckedAllowList::Set(str_values) => { + let schema_ids: Result, anyhow::Error> = str_values + .iter() + .map(|str_value| { + SchemaId::from_str(str_value).map_err(|_| { + anyhow!( + "Invalid schema id '{str_value}' found in 'allow_schema_ids' list" + ) + }) + }) + .collect(); + + AllowList::Set(schema_ids?) + } + }; + + Ok(NodeConfiguration { + database_url: value.database_url, + database_max_connections: value.database_max_connections, + http_port: value.http_port, + worker_pool_size: value.worker_pool_size, + allow_schema_ids, + network: NetworkConfiguration { + quic_port: value.quic_port, + mdns: value.mdns, + direct_node_addresses: value + .direct_node_addresses + .into_iter() + .map(to_multiaddress) + .collect(), + relay_mode: value.relay_mode, + relay_address: value.relay_address.map(to_multiaddress), + ..Default::default() + }, + }) + } +} + +fn to_multiaddress(socket_address: SocketAddr) -> Multiaddr { + let mut multiaddr = match socket_address.ip() { + IpAddr::V4(ip) => Multiaddr::from(Protocol::Ip4(ip)), + IpAddr::V6(ip) => Multiaddr::from(Protocol::Ip6(ip)), + }; + multiaddr.push(Protocol::Udp(socket_address.port())); + multiaddr.push(Protocol::QuicV1); + multiaddr +} + +fn try_determine_config_file_path() -> Option { + // Find config file in current folder + let mut current_dir = std::env::current_dir().expect("Could not determine current directory"); + current_dir.push(CONFIG_FILE_NAME); + + // Find config file in XDG config folder + let mut xdg_config_dir: PathBuf = ProjectDirs::from("", "", "aquadoggo") + .expect("Could not determine valid config directory path from operating system") + .config_dir() + .to_path_buf(); + xdg_config_dir.push(CONFIG_FILE_NAME); + + [current_dir, xdg_config_dir] + .iter() + .find(|path| path.exists()) + .cloned() +} + +pub fn print_config(path: ConfigFilePath, config: &NodeConfiguration) -> String { + println!( + r" ██████ ███████ ████ + ████████ ██████ + ██████ ███ + █████ ██ + █ ████ █████ + █ ██████ █ █████ + ██ ████ ███ █████ + █████ ██████ █ + ███████ ██ + █████████ █████████████ + ███████████ █████████ + █████████████████ ████ + ██████ ███████████ ██ + ██████████ █████ █ + █████████ ██ ███ ██ + ██████ █ █ ██ + ██ ██ ███████ ██ + ███████████ ██████ +████████ ████████████ ██████ +████ ██████ ██████████ █ ████ + █████████ ████████ ███ ███████ + ████████ ██████ ████████ +█████████ ████████████████████████ ███ +█████████ ██ + " + ); + + println!("{} v{}\n", "aquadoggo".underline(), crate_version!()); + + match path { + Some(path) => { + println!( + "Loading config file from {}", + absolute_path(path).display().to_string().blue() + ); + } + None => { + println!("No config file provided"); + } + } + + println!(); + println!("{}\n", "Configuration".underline()); + + let allow_schema_ids: String = match &config.allow_schema_ids { + AllowList::Set(schema_ids) => { + if schema_ids.is_empty() { + "none (disable replication)".into() + } else { + String::from("\n") + + &schema_ids + .iter() + .map(|id| format!("• {id}")) + .collect::>() + .join("\n") + } + } + AllowList::Wildcard => format!("{WILDCARD} (any schema id)"), + }; + + let database_url = if config.database_url == "sqlite::memory:" { + "memory (data is not persisted)".into() + } else if config.database_url.contains("sqlite:") { + format!("SQLite: {}", config.database_url) + } else { + "PostgreSQL".into() + }; + + let mdns = if config.network.mdns { + "enabled" + } else { + "disabled" + }; + + let relay_mode = if config.network.relay_mode { + "enabled" + } else { + "disabled" + }; + + format!( + r"Allow Schema IDs: {} +Database URL: {} +mDNS: {} +Relay Mode: {} + +Node is ready! +", + allow_schema_ids.blue(), + database_url.blue(), + mdns.blue(), + relay_mode.blue(), + ) +} + +/// Helper struct to deserialize from either a wildcard string "*" or a list of string values. +/// +/// These string values are not checked yet and need to be validated in a succeeding step. +#[derive(Debug, Clone)] +pub enum UncheckedAllowList { + Wildcard, + Set(Vec), +} + +impl Serialize for UncheckedAllowList { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + match self { + UncheckedAllowList::Wildcard => serializer.serialize_str(WILDCARD), + UncheckedAllowList::Set(list) => list.serialize(serializer), + } + } +} + +impl<'de> Deserialize<'de> for UncheckedAllowList { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + #[derive(Deserialize)] + #[serde(untagged)] + enum Value { + String(String), + Vec(Vec), + } + + let value = Value::deserialize(deserializer)?; + + match value { + Value::String(str_value) => { + if str_value == WILDCARD { + Ok(UncheckedAllowList::Wildcard) + } else { + Err(serde::de::Error::custom("only wildcard strings allowed")) + } + } + Value::Vec(vec) => Ok(UncheckedAllowList::Set(vec)), + } + } +} diff --git a/aquadoggo_cli/src/key_pair.rs b/aquadoggo_cli/src/key_pair.rs index 68e9f467a..d18775d9f 100644 --- a/aquadoggo_cli/src/key_pair.rs +++ b/aquadoggo_cli/src/key_pair.rs @@ -8,20 +8,14 @@ use std::path::PathBuf; use anyhow::Result; use p2panda_rs::identity::KeyPair; -/// File of the name where the private key will be stored inside. -const KEY_PAIR_FILE_NAME: &str = "private-key"; - /// Returns a new instance of `KeyPair` by either loading the private key from a path or generating /// a new one and saving it in the file system. -pub fn generate_or_load_key_pair(base_path: PathBuf) -> Result { - let mut key_pair_path = base_path; - key_pair_path.push(KEY_PAIR_FILE_NAME); - - let key_pair = if key_pair_path.is_file() { - load_key_pair_from_file(key_pair_path)? +pub fn generate_or_load_key_pair(path: PathBuf) -> Result { + let key_pair = if path.is_file() { + load_key_pair_from_file(path)? } else { let key_pair = KeyPair::new(); - save_key_pair_to_file(&key_pair, key_pair_path)?; + save_key_pair_to_file(&key_pair, path)?; key_pair }; @@ -32,7 +26,6 @@ pub fn generate_or_load_key_pair(base_path: PathBuf) -> Result { /// file system. /// /// This method is useful to run nodes for testing purposes. -#[allow(dead_code)] pub fn generate_ephemeral_key_pair() -> KeyPair { KeyPair::new() } diff --git a/aquadoggo_cli/src/main.rs b/aquadoggo_cli/src/main.rs index 2c20c011a..87039d9de 100644 --- a/aquadoggo_cli/src/main.rs +++ b/aquadoggo_cli/src/main.rs @@ -1,130 +1,56 @@ // SPDX-License-Identifier: AGPL-3.0-or-later -#![allow(clippy::uninlined_format_args)] +mod config; mod key_pair; -mod schemas; +mod utils; -use std::convert::{TryFrom, TryInto}; -use std::fs::File; -use std::net::IpAddr; +use std::convert::TryInto; -use anyhow::Result; -use aquadoggo::{Configuration, NetworkConfiguration, Node}; -use clap::Parser; -use libp2p::multiaddr::Protocol; -use libp2p::Multiaddr; +use anyhow::Context; +use aquadoggo::{AllowList, Node}; +use log::warn; -const CONFIG_FILE_PATH: &str = "config.toml"; - -#[derive(Parser, Debug)] -#[command(name = "aquadoggo Node", version)] -/// Node server for the p2panda network. -struct Cli { - /// Path to data folder, $HOME/.local/share/aquadoggo by default on Linux. - #[arg(short, long)] - data_dir: Option, - - /// Port for the http server, 2020 by default. - #[arg(short = 'P', long)] - http_port: Option, - - /// Port for the QUIC transport, 2022 by default for a relay/rendezvous node. - #[arg(short, long)] - quic_port: Option, - - /// URLs of remote nodes to replicate with. - #[arg(short, long)] - remote_node_addresses: Vec, - - /// Enable mDNS for peer discovery over LAN (using port 5353), false by default. - #[arg(short, long)] - mdns: Option, - - /// Enable relay server to facilitate peer connectivity, false by default. - #[arg(long)] - enable_relay_server: bool, - - /// IP address for the relay peer. - /// - /// eg. --relay-address "127.0.0.1" - #[arg(long)] - relay_address: Option, - - /// Port for the relay peer, defaults to expected relay port 2022. - /// - /// eg. --relay-port "1234" - #[arg(long)] - relay_port: Option, -} - -impl TryFrom for Configuration { - type Error = anyhow::Error; - - fn try_from(cli: Cli) -> Result { - let mut config = Configuration::new(cli.data_dir)?; - - let relay_address = if let Some(relay_address) = cli.relay_address { - let mut multiaddr = match relay_address { - IpAddr::V4(ip) => Multiaddr::from(Protocol::Ip4(ip)), - IpAddr::V6(ip) => Multiaddr::from(Protocol::Ip6(ip)), - }; - multiaddr.push(Protocol::Udp(cli.relay_port.unwrap_or(2022))); - multiaddr.push(Protocol::QuicV1); - - Some(multiaddr) - } else { - None - }; - - if let Some(http_port) = cli.http_port { - config.http_port = http_port; - } - - config.network = NetworkConfiguration { - mdns: cli.mdns.unwrap_or(false), - relay_server_enabled: cli.enable_relay_server, - relay_address, - remote_peers: cli.remote_node_addresses, - ..config.network - }; - - if let Some(quic_port) = cli.quic_port { - config.network.quic_port = quic_port; - } - - Ok(config) - } -} +use crate::config::{load_config, print_config}; +use crate::key_pair::{generate_ephemeral_key_pair, generate_or_load_key_pair}; #[tokio::main] -async fn main() { +async fn main() -> anyhow::Result<()> { env_logger::init(); - // Parse command line arguments - let cli = Cli::parse(); - - // Load configuration parameters and apply defaults - let mut config: Configuration = cli.try_into().expect("Could not load configuration"); - - // Read schema ids from config.toml file or - let supported_schemas = match File::open(CONFIG_FILE_PATH) { - Ok(mut file) => Some( - schemas::read_schema_ids_from_file(&mut file) - .expect("Reading schema ids from config.toml failed"), - ), - Err(_) => None, + // Load configuration from command line arguments, environment variables and .toml file + let (config_file_path, config) = load_config().context("Could not load configuration")?; + + // Convert to `aquadoggo` configuration format and check for invalid inputs + let node_config = config + .clone() + .try_into() + .context("Could not load configuration")?; + + // Generate a new key pair, either just for this session or persisted. Folders are + // automatically created when we picked a path + let key_pair = match &config.private_key { + Some(path) => generate_or_load_key_pair(path.clone()) + .context("Could not load private key from file")?, + None => generate_ephemeral_key_pair(), }; - config.supported_schema_ids = supported_schemas; - // We unwrap the path as we know it has been initialised during the conversion step before - let base_path = config.base_path.clone().unwrap(); + // Show configuration info to the user + println!("{}", print_config(config_file_path, &node_config)); - // Generate new key pair or load it from file - let key_pair = - key_pair::generate_or_load_key_pair(base_path).expect("Could not load key pair from file"); + // Show some hopefully helpful warnings + match &node_config.allow_schema_ids { + AllowList::Set(values) => { + if values.is_empty() && !node_config.network.relay_mode { + warn!("Your node was set to not allow any schema ids which is only useful in combination with enabling relay mode. With this setting you will not be able to interact with any client or node."); + } + } + AllowList::Wildcard => { + warn!("Allowed schema ids is set to wildcard. Your node will support _any_ schemas it will encounter on the network. This is useful for experimentation and local development but _not_ recommended for production settings."); + } + } // Start p2panda node in async runtime - let node = Node::start(key_pair, config).await; + let node = Node::start(key_pair, node_config).await; // Run this until [CTRL] + [C] got pressed or something went wrong tokio::select! { @@ -134,4 +60,6 @@ async fn main() { // Wait until all tasks are gracefully shut down and exit node.shutdown().await; + + Ok(()) } diff --git a/aquadoggo_cli/src/schemas.rs b/aquadoggo_cli/src/schemas.rs deleted file mode 100644 index bdddc548d..000000000 --- a/aquadoggo_cli/src/schemas.rs +++ /dev/null @@ -1,18 +0,0 @@ -// SPDX-License-Identifier: AGPL-3.0-or-later - -use std::fs::File; -use std::io::Read; - -use anyhow::{anyhow, Result}; -use p2panda_rs::schema::SchemaId; -use toml::Table; - -pub fn read_schema_ids_from_file(file: &mut File) -> Result> { - let mut buf = String::new(); - file.read_to_string(&mut buf)?; - let table = buf.parse::().unwrap(); - let value = table.get("supported_schema_ids").ok_or(anyhow!( - "No \"supported_schema_ids\" field found config file" - ))?; - Ok(value.clone().try_into::>()?) -} diff --git a/aquadoggo_cli/src/utils.rs b/aquadoggo_cli/src/utils.rs new file mode 100644 index 000000000..477c8047e --- /dev/null +++ b/aquadoggo_cli/src/utils.rs @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +use std::env; +use std::path::{Path, PathBuf}; + +use path_clean::PathClean; + +/// Returns the absolute path of a file or directory. +pub fn absolute_path(path: impl AsRef) -> PathBuf { + let path = path.as_ref(); + + if path.is_absolute() { + path.to_path_buf() + } else { + env::current_dir() + .expect("Could not determine current directory") + .join(path) + } + .clean() +}