Skip to content

Commit

Permalink
Merge branch 'main' into rkuris/readme
Browse files Browse the repository at this point in the history
  • Loading branch information
rkuris committed Dec 4, 2024
2 parents 27ab43f + 60291db commit f91ab8a
Show file tree
Hide file tree
Showing 9 changed files with 658 additions and 114 deletions.
7 changes: 1 addition & 6 deletions RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
# Releasing firewood

Releasing firewood is straightforward and can be done entirely in CI.
Releasing firewood is straightforward and can be done entirely in CI.

Firewood is made up of several sub-projects in a workspace. Each project is in
its own crate and has an independent version.

* firewood
* storage
* fwdctl
* grpc-testtool

The first step in drafting a release is ensuring all crates within the firewood
project are using the version of the new release. There is a utility to ensure
all versions are updated simultaneously in `cargo-workspace-version`. To use it
Expand Down
14 changes: 12 additions & 2 deletions benchmark/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,19 @@ struct Args {
cache_size: NonZeroUsize,
#[arg(short, long, default_value_t = 128)]
revisions: usize,
#[arg(short = 'p', long, default_value_t = 3000)]
#[arg(
short = 'p',
long,
default_value_t = 3000,
help = "Port to listen for prometheus"
)]
prometheus_port: u16,
#[arg(short = 's', long, default_value_t = false)]
#[arg(
short = 's',
long,
default_value_t = false,
help = "Dump prometheus stats on exit"
)]
stats_dump: bool,

#[clap(flatten)]
Expand Down
5 changes: 2 additions & 3 deletions firewood/benches/hashops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use std::{fs::File, iter::repeat_with, os::raw::c_int, path::Path};
use storage::{MemStore, NodeStore};

// To enable flamegraph output
// cargo bench --bench shale-bench -- --profile-time=N
// cargo bench --bench hashops -- --profile-time=N
enum FlamegraphProfiler {
Init(c_int),
Active(ProfilerGuard<'static>),
Expand Down Expand Up @@ -137,8 +137,7 @@ fn bench_db<const N: usize>(criterion: &mut Criterion) {
criterion_group! {
name = benches;
config = Criterion::default().with_profiler(FlamegraphProfiler::Init(100));
// targets = bench_trie_hash, bench_merkle::<3, 32>, bench_db::<100>
targets = bench_merkle::<3, 4>, bench_merkle<3, 32>, bench_db<100>
targets = bench_merkle::<3, 4>, bench_merkle<3, 32>, bench_db::<100>
}

criterion_main!(benches);
3 changes: 3 additions & 0 deletions storage/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@ metrics = "0.24.0"
log = { version = "0.4.20", optional = true }
bytemuck = "1.7.0"
bytemuck_derive = "1.7.0"
bitfield = "0.17.0"

[dev-dependencies]
rand = "0.8.5"
test-case = "3.3.1"
criterion = { version = "0.5.1", features = ["async_tokio", "html_reports"] }
pprof = { version = "0.14.0", features = ["flamegraph"] }
tempfile = "3.12.0"

[features]
logger = ["log"]
Expand Down
99 changes: 87 additions & 12 deletions storage/benches/serializer.rs
Original file line number Diff line number Diff line change
@@ -1,31 +1,84 @@
// Copyright (C) 2023, Ava Labs, Inc. All rights reserved.
// See the file LICENSE.md for licensing terms.

use std::{array::from_fn, num::NonZeroU64};
use std::{array::from_fn, fs::File, num::NonZeroU64, os::raw::c_int};

use bincode::Options;
use criterion::{criterion_group, criterion_main, Criterion};
use criterion::{criterion_group, criterion_main, profiler::Profiler, Criterion};
use pprof::ProfilerGuard;
use smallvec::SmallVec;
use storage::{LeafNode, Node, Path};

use std::path::Path as FsPath;

// For flamegraphs:
// cargo bench --bench serializer -- --profile-time=5

enum FlamegraphProfiler {
Init(c_int),
Active(ProfilerGuard<'static>),
}

fn file_error_panic<T, U>(path: &FsPath) -> impl FnOnce(T) -> U + '_ {
|_| panic!("Error on file `{}`", path.display())
}

impl Profiler for FlamegraphProfiler {
#[allow(clippy::unwrap_used)]
fn start_profiling(&mut self, _benchmark_id: &str, _benchmark_dir: &FsPath) {
if let Self::Init(frequency) = self {
let guard = ProfilerGuard::new(*frequency).unwrap();
*self = Self::Active(guard);
}
}

#[allow(clippy::unwrap_used)]
fn stop_profiling(&mut self, _benchmark_id: &str, benchmark_dir: &FsPath) {
std::fs::create_dir_all(benchmark_dir).unwrap();
let filename = "firewood-flamegraph.svg";
let flamegraph_path = benchmark_dir.join(filename);
#[allow(clippy::unwrap_used)]
let flamegraph_file =
File::create(&flamegraph_path).unwrap_or_else(file_error_panic(&flamegraph_path));

#[allow(clippy::unwrap_used)]
if let Self::Active(profiler) = self {
profiler
.report()
.build()
.unwrap()
.flamegraph(flamegraph_file)
.unwrap_or_else(file_error_panic(&flamegraph_path));
}
}
}

fn leaf(c: &mut Criterion) {
let mut group = c.benchmark_group("leaf");
let input = Node::Leaf(LeafNode {
partial_path: Path(SmallVec::from_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])),
partial_path: Path(SmallVec::from_slice(&[0, 1])),
value: SmallVec::from_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
});
let serializer = bincode::DefaultOptions::new().with_varint_encoding();
group.bench_with_input("leaf", &input, |b, input| {
group.bench_with_input("serde", &input, |b, input| {
b.iter(|| {
serializer.serialize(input).unwrap();
})
});

group.bench_with_input("manual", &input, |b, input| {
b.iter(|| {
let mut bytes = Vec::<u8>::new();
input.as_bytes(0, &mut bytes);
})
});
group.finish();
}

fn branch(c: &mut Criterion) {
let mut group = c.benchmark_group("branch");
let mut group = c.benchmark_group("has_value");
let mut input = Node::Branch(Box::new(storage::BranchNode {
partial_path: Path(SmallVec::from_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])),
partial_path: Path(SmallVec::from_slice(&[0, 1])),
value: Some(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9].into_boxed_slice()),
children: from_fn(|i| {
if i == 0 {
Expand All @@ -39,24 +92,46 @@ fn branch(c: &mut Criterion) {
}),
}));
let serializer = bincode::DefaultOptions::new().with_varint_encoding();
let benchfn = |b: &mut criterion::Bencher, input: &storage::Node| {
let serde_serializer = |b: &mut criterion::Bencher, input: &storage::Node| {
b.iter(|| {
serializer.serialize(input).unwrap();
})
};

group.bench_with_input("1_child+has_value", &input, benchfn);
let manual_serializer = |b: &mut criterion::Bencher, input: &storage::Node| {
b.iter(|| {
let mut bytes = Vec::new();
input.as_bytes(0, &mut bytes);
})
};

group.bench_with_input("serde", &input, serde_serializer);
group.bench_with_input("manual", &input, manual_serializer);
group.finish();

let mut group = c.benchmark_group("1_child");
input.as_branch_mut().unwrap().value = None;
group.bench_with_input("1_child", &input, benchfn);
group.bench_with_input("serde", &input, serde_serializer);
group.bench_with_input("manual", &input, manual_serializer);
let child = input.as_branch().unwrap().children[0].clone();
group.finish();

let mut group = c.benchmark_group("2_child");
input.as_branch_mut().unwrap().children[1] = child.clone();
group.bench_with_input("2_child", &input, benchfn);
group.bench_with_input("serde", &input, serde_serializer);
group.bench_with_input("manual", &input, manual_serializer);
group.finish();

let mut group = c.benchmark_group("16_child");
input.as_branch_mut().unwrap().children = std::array::from_fn(|_| child.clone());
group.bench_with_input("16_child", &input, benchfn);
group.bench_with_input("serde", &input, serde_serializer);
group.bench_with_input("manual", &input, manual_serializer);
group.finish();
}

criterion_group!(serializers, leaf, branch);
criterion_group!(
name = serializers;
config = Criterion::default().with_profiler(FlamegraphProfiler::Init(100));
targets = leaf, branch
);
criterion_main!(serializers);
120 changes: 117 additions & 3 deletions storage/src/linear/filebacked.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,7 @@ impl FileBacked {

impl ReadableStorage for FileBacked {
fn stream_from(&self, addr: u64) -> Result<Box<dyn Read>, Error> {
let mut fd = self.fd.lock().expect("p");
fd.seek(std::io::SeekFrom::Start(addr))?;
Ok(Box::new(fd.try_clone().expect("poisoned lock")))
Ok(Box::new(PredictiveReader::new(self, addr)))
}

fn size(&self) -> Result<u64, Error> {
Expand Down Expand Up @@ -112,3 +110,119 @@ impl WritableStorage for FileBacked {
guard.put(addr, next);
}
}

/// A reader that can predictively read from a file, avoiding reading past boundaries, but reading in 1k chunks
struct PredictiveReader {
fd: File,
buffer: [u8; Self::PREDICTIVE_READ_BUFFER_SIZE],
offset: u64,
len: usize,
pos: usize,
}

impl PredictiveReader {
const PREDICTIVE_READ_BUFFER_SIZE: usize = 1024;

fn new(fb: &FileBacked, start: u64) -> Self {
let fd = fb
.fd
.lock()
.expect("poisoned lock")
.try_clone()
.expect("resource exhaustion");

Self {
fd,
buffer: [0u8; Self::PREDICTIVE_READ_BUFFER_SIZE],
offset: start,
len: 0,
pos: 0,
}
}
}

impl Read for PredictiveReader {
fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
if self.len == self.pos {
let bytes_left_in_page = Self::PREDICTIVE_READ_BUFFER_SIZE
- (self.offset % Self::PREDICTIVE_READ_BUFFER_SIZE as u64) as usize;
self.fd.seek(std::io::SeekFrom::Start(self.offset))?;
let read = self.fd.read(&mut self.buffer[..bytes_left_in_page])?;
self.offset += read as u64;
self.len = read;
self.pos = 0;
}
let max_to_return = std::cmp::min(buf.len(), self.len - self.pos);
buf[..max_to_return].copy_from_slice(&self.buffer[self.pos..self.pos + max_to_return]);
self.pos += max_to_return;
Ok(max_to_return)
}
}

#[cfg(test)]
mod test {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;

#[test]
fn basic_reader_test() {
let mut tf = NamedTempFile::new().unwrap();
let path = tf.path().to_path_buf();
let output = tf.as_file_mut();
write!(output, "hello world").unwrap();

// whole thing at once, this is always less than 1K so it should
// read the whole thing in
let fb = FileBacked::new(
path,
NonZero::new(10).unwrap(),
NonZero::new(10).unwrap(),
false,
)
.unwrap();
let mut reader = fb.stream_from(0).unwrap();
let mut buf: String = String::new();
assert_eq!(reader.read_to_string(&mut buf).unwrap(), 11);
assert_eq!(buf, "hello world".to_string());
assert_eq!(0, reader.read(&mut [0u8; 1]).unwrap());

// byte at a time
let mut reader = fb.stream_from(0).unwrap();
for ch in b"hello world" {
let mut buf = [0u8; 1];
let read = reader.read(&mut buf).unwrap();
assert_eq!(read, 1);
assert_eq!(buf[0], *ch);
}
assert_eq!(0, reader.read(&mut [0u8; 1]).unwrap());

// with offset
let mut reader = fb.stream_from(6).unwrap();
buf = String::new();
assert_eq!(reader.read_to_string(&mut buf).unwrap(), 5);
assert_eq!(buf, "world".to_string());
}

#[test]
fn big_file() {
let mut tf = NamedTempFile::new().unwrap();
let path = tf.path().to_path_buf();
let output = tf.as_file_mut();
for _ in 0..1000 {
write!(output, "hello world").unwrap();
}

let fb = FileBacked::new(
path,
NonZero::new(10).unwrap(),
NonZero::new(10).unwrap(),
false,
)
.unwrap();
let mut reader = fb.stream_from(0).unwrap();
let mut buf: String = String::new();
assert_eq!(reader.read_to_string(&mut buf).unwrap(), 11000);
assert_eq!(buf.len(), 11000);
}
}
17 changes: 7 additions & 10 deletions storage/src/node/branch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::{LeafNode, LinearAddress, Node, Path, TrieHash};
use std::fmt::{Debug, Error as FmtError, Formatter};

#[derive(PartialEq, Eq, Clone, Debug)]
#[repr(C)]
/// A child of a branch node.
pub enum Child {
/// There is a child at this index, but we haven't hashed it
Expand Down Expand Up @@ -42,7 +43,7 @@ impl Serialize for BranchNode {
state.serialize_field("partial_path", &self.partial_path)?;
state.serialize_field("value", &self.value)?;

let children: SmallVec<[(u8, LinearAddress, TrieHash); Self::MAX_CHILDREN]> = self
let children: SmallVec<[(u8, LinearAddress, &TrieHash); Self::MAX_CHILDREN]> = self
.children
.iter()
.enumerate()
Expand All @@ -51,9 +52,7 @@ impl Serialize for BranchNode {
Some(Child::Node(_)) => {
panic!("serializing in-memory node for disk storage")
}
Some(Child::AddressWithHash(addr, hash)) => {
Some((offset as u8, *addr, (*hash).clone()))
}
Some(Child::AddressWithHash(addr, hash)) => Some((offset as u8, *addr, hash)),
})
.collect();

Expand Down Expand Up @@ -92,18 +91,16 @@ impl<'de> Deserialize<'de> for BranchNode {

impl Debug for BranchNode {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtError> {
write!(f, "[Branch")?;
write!(f, "[BranchNode")?;
write!(f, r#" path="{:?}""#, self.partial_path)?;

for (i, c) in self.children.iter().enumerate() {
match c {
None => {}
Some(Child::Node(_)) => {} //TODO
Some(Child::AddressWithHash(addr, hash)) => write!(
f,
"(index: {i:?}), address={addr:?}, hash={:?})",
hex::encode(hash),
)?,
Some(Child::AddressWithHash(addr, hash)) => {
write!(f, "({i:?}: address={addr:?} hash={})", hex::encode(hash),)?
}
}
}

Expand Down
Loading

0 comments on commit f91ab8a

Please sign in to comment.