From e62af1280f8d50ff406fd3debbcec4d14e63ecb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Nordstr=C3=B8m?= Date: Thu, 21 Nov 2024 00:04:09 +0100 Subject: [PATCH] Begin work on implementing BIP39 algorithm. --- Cargo.lock | 106 +++++++++++++++++++++++++++++ Cargo.toml | 2 + crates/pgen/Cargo.toml | 4 ++ crates/pgen/README.md | 69 ++++++++++++++++--- crates/pgen/src/bip39_algorithm.rs | 81 ++++++++++++++++++++++ crates/pgen/src/main.rs | 2 + 6 files changed, 254 insertions(+), 10 deletions(-) create mode 100644 crates/pgen/src/bip39_algorithm.rs diff --git a/Cargo.lock b/Cargo.lock index ad9738e..1c65d63 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -51,6 +51,15 @@ dependencies = [ name = "bip39-lexical-data" version = "1.0.0" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "byteorder" version = "1.5.0" @@ -101,10 +110,49 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "afb84c814227b90d6895e01398aee0d8033c00e7466aca416fb6a8e0eb19d8a7" +[[package]] +name = "cpufeatures" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ca741a962e1b0bff6d724a1a0958b686406e853bb14061f218562e1896f95e6" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "eff-lexical-data" version = "1.0.0" +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.15" @@ -167,6 +215,8 @@ dependencies = [ "clap", "eff-lexical-data", "rand", + "sha2", + "test-case", "thiserror", ] @@ -233,6 +283,17 @@ version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "syn" version = "2.0.87" @@ -244,6 +305,39 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "test-case" +version = "3.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb2550dd13afcd286853192af8601920d959b14c401fcece38071d53bf0768a8" +dependencies = [ + "test-case-macros", +] + +[[package]] +name = "test-case-core" +version = "3.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adcb7fd841cd518e279be3d5a3eb0636409487998a4aff22f3de87b81e88384f" +dependencies = [ + "cfg-if", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "test-case-macros" +version = "3.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c89e72a01ed4c579669add59014b9a524d609c0c88c6a585ce37485879f6ffb" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "test-case-core", +] + [[package]] name = "thiserror" version = "2.0.3" @@ -264,12 +358,24 @@ dependencies = [ "syn", ] +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + [[package]] name = "unicode-ident" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index dad8b86..e24b8eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,4 +13,6 @@ eff-lexical-data = { path = "crates/eff-lexical-data", version = "1.0.0" } anyhow = { version = "1.0.93", features = ["backtrace"] } clap = { version = "4.5.21", default-features = false, features = ["std", "derive", "help", "usage", "error-context"] } rand = "0.8.5" +sha2 = "0.10.8" +test-case = "3.3.1" thiserror = "2.0.3" diff --git a/crates/pgen/Cargo.toml b/crates/pgen/Cargo.toml index 738c991..f8e35bb 100644 --- a/crates/pgen/Cargo.toml +++ b/crates/pgen/Cargo.toml @@ -15,4 +15,8 @@ bip39-lexical-data = { workspace = true } clap = { workspace = true } eff-lexical-data = { workspace = true } rand = { workspace = true } +sha2 = { workspace = true } thiserror = { workspace = true } + +[dev-dependencies] +test-case = { workspace = true } diff --git a/crates/pgen/README.md b/crates/pgen/README.md index 4bf030c..32a81ef 100644 --- a/crates/pgen/README.md +++ b/crates/pgen/README.md @@ -87,6 +87,7 @@ pgen -V | --version `-w` Specify wordlist to use. * `eff-autocomplete` (default): Use *EFF's Short Wordlist #2* + (EFF's "short word list" with words that have unique three-character prefixes) Features: - Each word has a unique three-character prefix. This means that software could @@ -99,6 +100,7 @@ pgen -V | --version - * `eff-long`: Use *EFF's Long Wordlist* + (EFF's "long word list") Recommended for the creation of memorable passphrases since the increased number of words, as well as the greater effective word length, allows for good entropy with a lower amount @@ -117,6 +119,7 @@ pgen -V | --version - * `eff-short`: Use *EFF's Short Wordlist #1* + (EFF's "general short word list") Features: - Designed to include the 1,296 most memorable and distinct words. @@ -125,7 +128,7 @@ pgen -V | --version - [Deep Dive: EFF's New Wordlists for Random Passphrases][EFFWL] (2016) - -* `bip39`: Use *BIP39* wordlist +* `bip39`: Use *BIP39* English wordlist Details: - [BIP39][BIP39] @@ -153,6 +156,59 @@ your computer to generate "sufficiently random" numbers. `-V`, `--version` Print version information and exit. +## Calculation of entropy + +When calculating the entropy of a password or a passphrase, +[one must assume that the password generation procedure is known to the attacker](https://crypto.stackexchange.com/a/376). +As such, the strength of the passphrases that `pgen` generate are not weakened +in and of itself by the fact that the wordlists we use are publicly known. + +### EFF wordlists + +When one of the EFF wordlists is used, you have the following total number of distinct words +to pick from the respective list: + +- 7776 words in EFF's "long word list" (`eff-long`) +- 1296 words in EFF's "general short word list" (`eff-short`) +- 1296 words in EFF's "short word list" with words that have unique three-character prefixes (`eff-autocomplete`) + +The number of bits of entropy added by each randomly selected word from these EFF wordlists +depends on the total number of words that are in the list we are selecting the words from. + +To calculate the entropy added by each word, we take the binary logarithm of the number of words total in the wordlist: + +- log2(7776) ~= `12.92` bits of entropy added from each randomly selected word in the "long word list". +- log2(1296) ~= `10.33` bits of entropy added from each randomly selected word in one of the EFF's short word lists. + +Then: + +- Creating a passphrase consisting of 10 randomly selected words from the "long word list" gives + a passphrase with log2(7776^10) ~= `129.25` bits of entropy. +- Creating a passphrase consisting of 12 randomly selected words from one of the EFF's short word lists gives + a passphrase with log2(1296^12) ~= `124.08` bits of entropy. + +### BIP39 English wordlist and BIP39 algorithm + +When using the BIP39 algorithm, the passphrase is derived directly from an entropy of random bits, +which are then padded with bits from a checksum at the end. + +For example, for a BIP39 mnemonic sentence consisting of 12 words, one has to use 128 random bits +appended by 4 bits of checksum bits. + +The checksum bits do not add entropy, nor are any of the initial entropy bits discarded. + +So the entropy of a BIP39 mnemonic sentence is simply the number of random bits +it was generated from in the first place. + +Specifically, BIP39 has five different possible mnemonic sentence lengths, each with +the following corresponding number of bits of entropy: + +- `128` bits of entropy for a BIP39 mnemonic sentence consisting of 12 words. +- `160` bits of entropy for a BIP39 mnemonic sentence consisting of 15 words. +- `192` bits of entropy for a BIP39 mnemonic sentence consisting of 18 words. +- `224` bits of entropy for a BIP39 mnemonic sentence consisting of 21 words. +- `256` bits of entropy for a BIP39 mnemonic sentence consisting of 24 words. + ## How many bits of entropy does your passphrase need? How many bits of entropy should your passphrase consist of? @@ -178,22 +234,15 @@ weak hashing algorithms such as MD5 were used, it is the opinion of the author that the neighbourhood of 128 bits of entropy is in fact an appropriate default for such use. -When calculating the entropy of a password or a passphrase, -[one must assume that the password generation procedure is known to the attacker](https://crypto.stackexchange.com/a/376). -Hence with 12 words from either of the short wordlists, each of which -consist of 1296 words, we get a password entropy of log2(1296^12) ~= -124.08 bits. Similarily, with 10 words from the long wordlist (7776 words), -we get a password entropy of log2(7776^10) ~= 129.25 bits. - ## Is a CSPRNG really needed here? Using a CSPRNG ensures uniform distribution of probability. This in turn -ensures that the password entropy calculations are correct. Hence it makes +ensures that the password entropy calculations are correct. Hence, it makes sense to use a CSPRNG. ## See also -* `lastresort`(1) on [crates.io](https://crates.io/crates/base256) / [GitHub](https://github.com/ctsrc/Base256) +* `lastresort`(1) on [crates.io](https://crates.io/crates/base256) or [GitHub](https://github.com/ctsrc/Base256) [EFFWL]: https://www.eff.org/deeplinks/2016/07/new-wordlists-random-passphrases diff --git a/crates/pgen/src/bip39_algorithm.rs b/crates/pgen/src/bip39_algorithm.rs new file mode 100644 index 0000000..6dc632b --- /dev/null +++ b/crates/pgen/src/bip39_algorithm.rs @@ -0,0 +1,81 @@ +use sha2::{Digest, Sha256}; + +/// Calculate BIP39 checksum (CS) bits given entropy bits. +fn calculate_cs_bits(ent: &[u8]) -> u8 { + let mut hasher = Sha256::new(); + hasher.update(ent); + let hash = hasher.finalize(); + let shift = match ent.len() { + // 128 bits of entropy (16 bytes) needs 4 bits of checksum + 16 => 4usize, + // 160 bits of entropy (20 bytes) needs 5 bits of checksum + 20 => 3, + // 192 bits of entropy (24 bytes) needs 6 bits of checksum + 24 => 2, + // 224 bits of entropy (28 bytes) needs 7 bits of checksum + 28 => 1, + // 256 bits of entropy (32 bytes) needs 8 bits of checksum + 32 => 0, + // No other number of bits of entropy aside from the above is supported by BIP39. + // And since this function is internal to our program, and we only intend to call it + // with the supported number of bits of entropy, there really isn't much point in going + // through the extra motions of returning an error since it would mean we have a fatal + // (unrecoverable) error in the coding of our program anyway. So we may as well panic + // via `unreachable!()` instead of returning details about the error. + _ => unreachable!(), + }; + hash[0] >> shift +} + +#[cfg(test)] +mod test { + use crate::bip39_algorithm::calculate_cs_bits; + use test_case::test_case; + + // From : + // + // ```json + // [ + // "00000000000000000000000000000000", + // "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about", + // "c55257c360c07c72029aebc1b53c05ed0362ada38ead3e3e9efa3708e53495531f09a6987599d18264c1e1c92f2cf141630c7a3c4ab7c81b2f001698e7463b04", + // "xprv9s21ZrQH143K3h3fDYiay8mocZ3afhfULfb5GX8kCBdno77K4HiA15Tg23wpbeF1pLfs1c5SPmYHrEpTuuRhxMwvKDwqdKiGJS9XFKzUsAF" + // ], + // ``` + // + // - 128 bits of "entropy" (all zero in this case). + // - The 12th word in the mnemonic sentence is the 4th word (index 3) in the BIP39 English wordlist. + #[test_case(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3; "128 bits of all zeros")] + // From : + // + // ```json + // [ + // "000000000000000000000000000000000000000000000000", + // "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon agent", + // "035895f2f481b1b0f01fcf8c289c794660b289981a78f8106447707fdd9666ca06da5a9a565181599b79f53b844d8a71dd9f439c52a3d7b3e8a79c906ac845fa", + // "xprv9s21ZrQH143K3mEDrypcZ2usWqFgzKB6jBBx9B6GfC7fu26X6hPRzVjzkqkPvDqp6g5eypdk6cyhGnBngbjeHTe4LsuLG1cCmKJka5SMkmU" + // ], + // ``` + // + // - 192 bits of "entropy" (all zero in this case). + // - The 18th word in the mnemonic sentence is the 40th word (index 39) in the BIP39 English wordlist. + #[test_case(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 39; "192 bits of all zeros")] + // From : + // + // ```json + // [ + // "0000000000000000000000000000000000000000000000000000000000000000", + // "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon art", + // "bda85446c68413707090a52022edd26a1c9462295029f2e60cd7c4f2bbd3097170af7a4d73245cafa9c3cca8d561a7c3de6f5d4a10be8ed2a5e608d68f92fcc8", + // "xprv9s21ZrQH143K32qBagUJAMU2LsHg3ka7jqMcV98Y7gVeVyNStwYS3U7yVVoDZ4btbRNf4h6ibWpY22iRmXq35qgLs79f312g2kj5539ebPM" + // ], + // ``` + // + // - 256 bits of "entropy" (all zero in this case). + // - The 24th word in the mnemonic sentence is the 103rd word (index 102) in the BIP39 English wordlist. + #[test_case(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 102; "256 bits of all zeros")] + fn calculates_cs_bits_correctly(ent: &[u8], cs_expected: u8) { + let cs_actual = calculate_cs_bits(ent); + assert_eq!(cs_expected, cs_actual); + } +} diff --git a/crates/pgen/src/main.rs b/crates/pgen/src/main.rs index 87854e4..663ceb0 100644 --- a/crates/pgen/src/main.rs +++ b/crates/pgen/src/main.rs @@ -16,6 +16,8 @@ #![forbid(unsafe_code)] +mod bip39_algorithm; + use bip39_lexical_data::WL_BIP39; use clap::{Parser, ValueEnum}; use eff_lexical_data::{WL_AUTOCOMPLETE, WL_LONG, WL_SHORT};