diff --git a/Cargo.toml b/Cargo.toml index 1b23a4e..6202b76 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,8 @@ version = "0.1.0" default = ["std", "rocksdb"] rocksdb = ["dep:rocksdb"] std = ["parity-scale-codec/std", "bitvec/std", "starknet-types-core/std"] +# internal +bench = [] [dependencies] bitvec = { version = "1", default-features = false, features = ["alloc"] } @@ -36,6 +38,7 @@ rocksdb = { optional = true, version = "0.21.0", features = [ ] } [dev-dependencies] +pprof = { version = "0.3", features = ["flamegraph"] } pathfinder-common = { git = "https://github.com/massalabs/pathfinder.git", package = "pathfinder-common", rev = "b7b6d76a76ab0e10f92e5f84ce099b5f727cb4db" } pathfinder-crypto = { git = "https://github.com/massalabs/pathfinder.git", package = "pathfinder-crypto", rev = "b7b6d76a76ab0e10f92e5f84ce099b5f727cb4db" } pathfinder-merkle-tree = { git = "https://github.com/massalabs/pathfinder.git", package = "pathfinder-merkle-tree", rev = "b7b6d76a76ab0e10f92e5f84ce099b5f727cb4db" } @@ -43,3 +46,9 @@ pathfinder-storage = { git = "https://github.com/massalabs/pathfinder.git", pack rand = "0.8.5" tempfile = "3.8.0" rstest = "0.18.2" +criterion = "0.5.1" + +[[bench]] +name = "storage" +required-features = ["bench"] +harness = false diff --git a/README.md b/README.md index d8b1d20..4e18fa8 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,13 @@ fn main() { } ``` +## Build and run benchmarks + +This crate uses `rayon` to parallelize hash computations. As such, results will vary depending on the number of cores of your cpu. +``` +cargo bench +``` + ## Acknowledgements - Shout out to [Danno Ferrin](https://github.com/shemnon) and [Karim Taam](https://github.com/matkt) for their work on Bonsai. This project is heavily inspired by their work. diff --git a/benches/flamegraph.rs b/benches/flamegraph.rs new file mode 100644 index 0000000..4fc346b --- /dev/null +++ b/benches/flamegraph.rs @@ -0,0 +1,39 @@ +use criterion::profiler::Profiler; +use pprof::ProfilerGuard; +use std::{fs::File, os::raw::c_int, path::Path}; + +pub struct FlamegraphProfiler<'a> { + frequency: c_int, + active_profiler: Option>, +} + +impl<'a> FlamegraphProfiler<'a> { + #[allow(dead_code)] + pub fn new(frequency: c_int) -> Self { + FlamegraphProfiler { + frequency, + active_profiler: None, + } + } +} + +impl<'a> Profiler for FlamegraphProfiler<'a> { + fn start_profiling(&mut self, _benchmark_id: &str, _benchmark_dir: &Path) { + self.active_profiler = Some(ProfilerGuard::new(self.frequency).unwrap()); + } + + fn stop_profiling(&mut self, _benchmark_id: &str, benchmark_dir: &Path) { + std::fs::create_dir_all(benchmark_dir).unwrap(); + let flamegraph_path = benchmark_dir.join("flamegraph.svg"); + let flamegraph_file = File::create(&flamegraph_path) + .expect("File system error while creating flamegraph.svg"); + if let Some(profiler) = self.active_profiler.take() { + profiler + .report() + .build() + .unwrap() + .flamegraph(flamegraph_file) + .expect("Error writing flamegraph"); + } + } +} diff --git a/benches/storage.rs b/benches/storage.rs new file mode 100644 index 0000000..373e92f --- /dev/null +++ b/benches/storage.rs @@ -0,0 +1,157 @@ +use std::hint::black_box; + +use bitvec::vec::BitVec; +use bonsai_trie::{ + databases::HashMapDb, + id::{BasicId, BasicIdBuilder}, + BonsaiStorage, BonsaiStorageConfig, +}; +use criterion::{criterion_group, criterion_main, Criterion}; +use rand::{prelude::*, thread_rng}; +use starknet_types_core::{ + felt::Felt, + hash::{Pedersen, StarkHash}, +}; + +mod flamegraph; + +fn storage(c: &mut Criterion) { + c.bench_function("storage commit", move |b| { + let mut bonsai_storage: BonsaiStorage = BonsaiStorage::new( + HashMapDb::::default(), + BonsaiStorageConfig::default(), + ) + .unwrap(); + let mut rng = thread_rng(); + + let felt = Felt::from_hex("0x66342762FDD54D033c195fec3ce2568b62052e").unwrap(); + for _ in 0..1000 { + let bitvec = BitVec::from_vec(vec![ + rng.gen(), + rng.gen(), + rng.gen(), + rng.gen(), + rng.gen(), + rng.gen(), + ]); + bonsai_storage.insert(&[], &bitvec, &felt).unwrap(); + } + + let mut id_builder = BasicIdBuilder::new(); + b.iter_batched( + || bonsai_storage.clone(), + |mut bonsai_storage| { + bonsai_storage.commit(id_builder.new_id()).unwrap(); + }, + criterion::BatchSize::LargeInput, + ); + }); +} + +fn one_update(c: &mut Criterion) { + c.bench_function("one update", move |b| { + let mut bonsai_storage: BonsaiStorage = BonsaiStorage::new( + HashMapDb::::default(), + BonsaiStorageConfig::default(), + ) + .unwrap(); + let mut rng = thread_rng(); + + let felt = Felt::from_hex("0x66342762FDD54D033c195fec3ce2568b62052e").unwrap(); + for _ in 0..1000 { + let bitvec = BitVec::from_vec(vec![ + rng.gen(), + rng.gen(), + rng.gen(), + rng.gen(), + rng.gen(), + rng.gen(), + ]); + bonsai_storage.insert(&[], &bitvec, &felt).unwrap(); + } + + let mut id_builder = BasicIdBuilder::new(); + bonsai_storage.commit(id_builder.new_id()).unwrap(); + + b.iter_batched( + || bonsai_storage.clone(), + |mut bonsai_storage| { + let bitvec = BitVec::from_vec(vec![0, 1, 2, 3, 4, 5]); + bonsai_storage.insert(&[], &bitvec, &felt).unwrap(); + bonsai_storage.commit(id_builder.new_id()).unwrap(); + }, + criterion::BatchSize::LargeInput, + ); + }); +} + +fn five_updates(c: &mut Criterion) { + c.bench_function("five updates", move |b| { + let mut bonsai_storage: BonsaiStorage = BonsaiStorage::new( + HashMapDb::::default(), + BonsaiStorageConfig::default(), + ) + .unwrap(); + let mut rng = thread_rng(); + + let felt = Felt::from_hex("0x66342762FDD54D033c195fec3ce2568b62052e").unwrap(); + for _ in 0..1000 { + let bitvec = BitVec::from_vec(vec![ + rng.gen(), + rng.gen(), + rng.gen(), + rng.gen(), + rng.gen(), + rng.gen(), + ]); + bonsai_storage.insert(&[], &bitvec, &felt).unwrap(); + } + + let mut id_builder = BasicIdBuilder::new(); + bonsai_storage.commit(id_builder.new_id()).unwrap(); + + b.iter_batched( + || bonsai_storage.clone(), + |mut bonsai_storage| { + bonsai_storage + .insert(&[], &BitVec::from_vec(vec![0, 1, 2, 3, 4, 5]), &felt) + .unwrap(); + bonsai_storage + .insert(&[], &BitVec::from_vec(vec![0, 2, 2, 5, 4, 5]), &felt) + .unwrap(); + bonsai_storage + .insert(&[], &BitVec::from_vec(vec![0, 1, 2, 3, 3, 5]), &felt) + .unwrap(); + bonsai_storage + .insert(&[], &BitVec::from_vec(vec![0, 1, 1, 3, 99, 3]), &felt) + .unwrap(); + bonsai_storage + .insert(&[], &BitVec::from_vec(vec![0, 1, 2, 3, 4, 6]), &felt) + .unwrap(); + bonsai_storage.commit(id_builder.new_id()).unwrap(); + }, + criterion::BatchSize::LargeInput, + ); + }); +} + +fn hash(c: &mut Criterion) { + c.bench_function("pedersen hash", move |b| { + let felt0 = + Felt::from_hex("0x100bd6fbfced88ded1b34bd1a55b747ce3a9fde9a914bca75571e4496b56443") + .unwrap(); + let felt1 = + Felt::from_hex("0x00a038cda302fedbc4f6117648c6d3faca3cda924cb9c517b46232c6316b152f") + .unwrap(); + b.iter(|| { + black_box(Pedersen::hash(&felt0, &felt1)); + }) + }); +} + +criterion_group! { + name = benches; + config = Criterion::default(); // .with_profiler(flamegraph::FlamegraphProfiler::new(100)); + targets = storage, one_update, five_updates, hash +} +criterion_main!(benches); diff --git a/src/changes.rs b/src/changes.rs index 3d9a19c..3018736 100644 --- a/src/changes.rs +++ b/src/changes.rs @@ -15,6 +15,7 @@ pub struct Change { } #[derive(Debug, Default)] +#[cfg_attr(feature = "bench", derive(Clone))] pub struct ChangeBatch(pub(crate) HashMap); const KEY_SEPARATOR: u8 = 0x00; @@ -115,6 +116,7 @@ impl ChangeBatch { } } +#[cfg_attr(feature = "bench", derive(Clone))] pub struct ChangeStore where ID: Id, diff --git a/src/key_value_db.rs b/src/key_value_db.rs index 84ce41f..4e58e49 100644 --- a/src/key_value_db.rs +++ b/src/key_value_db.rs @@ -18,6 +18,7 @@ use crate::{ }; /// Crate Trie <= KeyValueDB => BonsaiDatabase +#[cfg_attr(feature = "bench", derive(Clone))] pub struct KeyValueDB where DB: BonsaiDatabase, diff --git a/src/lib.rs b/src/lib.rs index 24d4f31..ab3b892 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -168,6 +168,20 @@ where tries: MerkleTrees, } +#[cfg(feature = "bench")] +impl Clone for BonsaiStorage +where + DB: BonsaiDatabase + Clone, + ChangeID: id::Id, + H: StarkHash + Send + Sync, +{ + fn clone(&self) -> Self { + Self { + tries: self.tries.clone(), + } + } +} + /// Trie root hash type. pub type BonsaiTrieHash = Felt; diff --git a/src/trie/merkle_tree.rs b/src/trie/merkle_tree.rs index 2abae46..55b1355 100644 --- a/src/trie/merkle_tree.rs +++ b/src/trie/merkle_tree.rs @@ -1,5 +1,5 @@ #[cfg(not(feature = "std"))] -use alloc::{format, string::ToString, vec::Vec}; +use alloc::{format, string::ToString, vec, vec::Vec}; use bitvec::{ prelude::{BitSlice, BitVec, Msb0}, view::BitView, @@ -68,15 +68,25 @@ impl ProofNode { pub(crate) struct MerkleTrees { pub db: KeyValueDB, - _hasher: PhantomData, pub trees: HashMap, MerkleTree>, } +#[cfg(feature = "bench")] +impl Clone + for MerkleTrees +{ + fn clone(&self) -> Self { + Self { + db: self.db.clone(), + trees: self.trees.clone(), + } + } +} + impl MerkleTrees { pub(crate) fn new(db: KeyValueDB) -> Self { Self { db, - _hasher: PhantomData, trees: HashMap::new(), } } @@ -184,33 +194,21 @@ impl MerkleTrees Result<(), BonsaiStorageError> { - #[allow(clippy::type_complexity)] #[cfg(not(feature = "std"))] - let db_changes: Vec< - Result< - HashMap>>, - BonsaiStorageError, - >, - > = self + let db_changes = self .trees .iter_mut() .map(|(_, tree)| tree.get_updates::()) - .collect(); - #[allow(clippy::type_complexity)] + .collect::, BonsaiStorageError>>()?; #[cfg(feature = "std")] - let db_changes: Vec< - Result< - HashMap>>, - BonsaiStorageError, - >, - > = self + let db_changes = self .trees .par_iter_mut() .map(|(_, tree)| tree.get_updates::()) - .collect(); + .collect::, BonsaiStorageError>>()?; + let mut batch = self.db.create_batch(); for changes in db_changes { - let changes = changes?; for (key, value) in changes { match value { InsertOrRemove::Insert(value) => { @@ -270,11 +268,32 @@ pub struct MerkleTree { _hasher: PhantomData, } -#[derive(Debug, PartialEq, Eq)] +// NB: #[derive(Clone)] does not work because it expands to an impl block which forces H: Clone, which Pedersen/Poseidon aren't. +#[cfg(feature = "bench")] +impl Clone for MerkleTree { + fn clone(&self) -> Self { + Self { + root_handle: self.root_handle.clone(), + root_hash: self.root_hash.clone(), + identifier: self.identifier.clone(), + storage_nodes: self.storage_nodes.clone(), + latest_node_id: self.latest_node_id.clone(), + death_row: self.death_row.clone(), + cache_leaf_modified: self.cache_leaf_modified.clone(), + _hasher: PhantomData, + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] pub(crate) enum InsertOrRemove { Insert(T), Remove, } +enum NodeOrFelt<'a> { + Node(&'a Node), + Felt(Felt), +} impl MerkleTree { /// Less visible initialization for `MerkleTree` as the main entry points should be @@ -340,22 +359,30 @@ impl MerkleTree { #[allow(clippy::type_complexity)] pub(crate) fn get_updates( &mut self, - ) -> Result>>, BonsaiStorageError> + ) -> Result>)>, BonsaiStorageError> { - let mut updates = HashMap::new(); + let mut updates = vec![]; for node_key in mem::take(&mut self.death_row) { - updates.insert(node_key, InsertOrRemove::Remove); + updates.push((node_key, InsertOrRemove::Remove)); } - let root_hash = - self.commit_subtree::(&mut updates, self.root_handle, Path(BitVec::new()))?; + + let mut hashes = vec![]; + self.compute_root_hash::(&mut hashes)?; + let root_hash = self.commit_subtree::( + &mut updates, + self.root_handle, + Path(BitVec::new()), + &mut hashes.drain(..), + )?; + for (key, value) in mem::take(&mut self.cache_leaf_modified) { - updates.insert( + updates.push(( TrieKey::new(&self.identifier, TrieKeyType::Flat, &key), match value { InsertOrRemove::Insert(value) => InsertOrRemove::Insert(value.encode()), InsertOrRemove::Remove => InsertOrRemove::Remove, }, - ); + )); } self.latest_node_id.reset(); self.root_hash = root_hash; @@ -363,24 +390,147 @@ impl MerkleTree { Ok(updates) } + fn get_node_or_felt( + &self, + node_handle: &NodeHandle, + ) -> Result> { + let node_id = match node_handle { + NodeHandle::Hash(hash) => return Ok(NodeOrFelt::Felt(*hash)), + NodeHandle::InMemory(root_id) => root_id, + }; + let node = self + .storage_nodes + .0 + .get(node_id) + .ok_or(BonsaiStorageError::Trie( + "Couldn't fetch node in the temporary storage".to_string(), + ))?; + Ok(NodeOrFelt::Node(node)) + } + + fn compute_root_hash( + &self, + hashes: &mut Vec, + ) -> Result> { + match self.get_node_or_felt::(&self.root_handle)? { + NodeOrFelt::Felt(felt) => Ok(felt), + NodeOrFelt::Node(node) => self.compute_hashes::(node, Path(BitVec::new()), hashes), + } + } + + /// Compute the hashes of all of the updated nodes in the merkle tree. This step + /// is separate from [`commit_subtree`] as it is done in parallel using rayon. + /// Computed hashes are pushed to the `hashes` vector, depth first. + fn compute_hashes( + &self, + node: &Node, + path: Path, + hashes: &mut Vec, + ) -> Result> { + use Node::*; + + match node { + Unresolved(hash) => Ok(*hash), + Binary(binary) => { + // we check if we have one or two changed children + + let left_path = path.new_with_direction(Direction::Left); + let node_left = self.get_node_or_felt::(&binary.left)?; + let right_path = path.new_with_direction(Direction::Right); + let node_right = self.get_node_or_felt::(&binary.right)?; + + let (left_hash, right_hash) = match (node_left, node_right) { + #[cfg(feature = "std")] + (NodeOrFelt::Node(left), NodeOrFelt::Node(right)) => { + // two children: use rayon + let (left, right) = rayon::join( + || self.compute_hashes::(left, left_path, hashes), + || { + let mut hashes = vec![]; + let felt = + self.compute_hashes::(right, right_path, &mut hashes)?; + Ok::<_, BonsaiStorageError>((felt, hashes)) + }, + ); + let (left_hash, (right_hash, hashes2)) = (left?, right?); + hashes.extend(hashes2); + + (left_hash, right_hash) + } + (left, right) => { + let left_hash = match left { + NodeOrFelt::Felt(felt) => felt, + NodeOrFelt::Node(node) => { + self.compute_hashes::(node, left_path, hashes)? + } + }; + let right_hash = match right { + NodeOrFelt::Felt(felt) => felt, + NodeOrFelt::Node(node) => { + self.compute_hashes::(node, right_path, hashes)? + } + }; + (left_hash, right_hash) + } + }; + + let hash = H::hash(&left_hash, &right_hash); + hashes.push(hash); + Ok(hash) + } + + Edge(edge) => { + let mut child_path = path.clone(); + child_path.0.extend(&edge.path.0); + let child_hash = match self.get_node_or_felt::(&edge.child)? { + NodeOrFelt::Felt(felt) => felt, + NodeOrFelt::Node(node) => { + self.compute_hashes::(node, child_path, hashes)? + } + }; + + let mut bytes = [0u8; 32]; + bytes.view_bits_mut::()[256 - edge.path.0.len()..] + .copy_from_bitslice(&edge.path.0); + + let felt_path = Felt::from_bytes_be(&bytes); + let mut length = [0; 32]; + // Safe as len() is guaranteed to be <= 251 + length[31] = edge.path.0.len() as u8; + + let length = Felt::from_bytes_be(&length); + let hash = H::hash(&child_hash, &felt_path) + length; + hashes.push(hash); + Ok(hash) + } + } + } + /// Persists any changes in this subtree to storage. /// /// This necessitates recursively calculating the hash of, and /// in turn persisting, any changed child nodes. This is necessary /// as the parent node's hash relies on its children hashes. + /// Hash computation is done in parallel with [`compute_hashes`] beforehand. /// /// In effect, the entire tree gets persisted. /// /// # Arguments /// - /// * `node` - The top node from the subtree to commit. + /// * `node_handle` - The top node from the subtree to commit. + /// * `hashes` - The precomputed hashes for the subtree as returned by [`compute_hashes`]. + /// The order is depth first, left to right. + /// + /// # Panics + /// + /// Panics if the precomputed `hashes` do not match the length of the modified subtree. fn commit_subtree( &mut self, - updates: &mut HashMap>>, + updates: &mut Vec<(TrieKey, InsertOrRemove>)>, node_handle: NodeHandle, path: Path, + hashes: &mut impl Iterator, ) -> Result> { - use Node::*; let node_id = match node_handle { NodeHandle::Hash(hash) => return Ok(hash), NodeHandle::InMemory(root_id) => root_id, @@ -393,56 +543,48 @@ impl MerkleTree { .ok_or(BonsaiStorageError::Trie( "Couldn't fetch node in the temporary storage".to_string(), ))? { - Unresolved(hash) => { + Node::Unresolved(hash) => { if path.0.is_empty() { - updates.insert( + updates.push(( TrieKey::new(&self.identifier, TrieKeyType::Trie, &[]), InsertOrRemove::Insert(Node::Unresolved(hash).encode()), - ); + )); Ok(hash) } else { Ok(hash) } } - Binary(mut binary) => { + Node::Binary(mut binary) => { let left_path = path.new_with_direction(Direction::Left); - let left_hash = self.commit_subtree::(updates, binary.left, left_path)?; + let left_hash = + self.commit_subtree::(updates, binary.left, left_path, hashes)?; let right_path = path.new_with_direction(Direction::Right); - let right_hash = self.commit_subtree::(updates, binary.right, right_path)?; - let hash = H::hash(&left_hash, &right_hash); + let right_hash = + self.commit_subtree::(updates, binary.right, right_path, hashes)?; + let hash = hashes.next().expect("mismatched hash state"); binary.hash = Some(hash); binary.left = NodeHandle::Hash(left_hash); binary.right = NodeHandle::Hash(right_hash); let key_bytes: Vec = path.into(); - updates.insert( + updates.push(( TrieKey::new(&self.identifier, TrieKeyType::Trie, &key_bytes), InsertOrRemove::Insert(Node::Binary(binary).encode()), - ); + )); Ok(hash) } - - Edge(mut edge) => { + Node::Edge(mut edge) => { let mut child_path = path.clone(); child_path.0.extend(&edge.path.0); - let child_hash = self.commit_subtree::(updates, edge.child, child_path)?; - let mut bytes = [0u8; 32]; - bytes.view_bits_mut::()[256 - edge.path.0.len()..] - .copy_from_bitslice(&edge.path.0); - - let felt_path = Felt::from_bytes_be(&bytes); - let mut length = [0; 32]; - // Safe as len() is guaranteed to be <= 251 - length[31] = edge.path.0.len() as u8; - - let length = Felt::from_bytes_be(&length); - let hash = H::hash(&child_hash, &felt_path) + length; + let child_hash = + self.commit_subtree::(updates, edge.child, child_path, hashes)?; + let hash = hashes.next().expect("mismatched hash state"); edge.hash = Some(hash); edge.child = NodeHandle::Hash(child_hash); let key_bytes: Vec = path.into(); - updates.insert( + updates.push(( TrieKey::new(&self.identifier, TrieKeyType::Trie, &key_bytes), InsertOrRemove::Insert(Node::Edge(edge).encode()), - ); + )); Ok(hash) } }