Skip to content

Commit

Permalink
paged vecvec (#209)
Browse files Browse the repository at this point in the history
  • Loading branch information
felixguendling authored Mar 19, 2024
1 parent 3f8c440 commit ea98b07
Show file tree
Hide file tree
Showing 10 changed files with 642 additions and 19 deletions.
13 changes: 13 additions & 0 deletions include/cista/bit_counting.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,19 @@

namespace cista {

template <typename T>
inline constexpr unsigned constexpr_trailing_zeros(T t) {
auto const is_bit_set = [&](unsigned const i) {
return ((t >> i) & T{1U}) == T{1U};
};
for (auto i = 0U; i != sizeof(T) * 8U; ++i) {
if (is_bit_set(i)) {
return i;
}
}
return 0U;
}

template <typename T>
constexpr unsigned trailing_zeros(T t) noexcept {
static_assert(sizeof(T) == 8U || sizeof(T) == 4U, "not supported");
Expand Down
2 changes: 2 additions & 0 deletions include/cista/containers.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include "cista/containers/mutable_fws_multimap.h"
#include "cista/containers/nvec.h"
#include "cista/containers/optional.h"
#include "cista/containers/paged.h"
#include "cista/containers/paged_vecvec.h"
#include "cista/containers/string.h"
#include "cista/containers/tuple.h"
#include "cista/containers/unique_ptr.h"
Expand Down
34 changes: 27 additions & 7 deletions include/cista/containers/bitvec.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ struct basic_bitvec {
using block_t = typename Vec::value_type;
using size_type = typename Vec::size_type;
static constexpr auto const bits_per_block =
static_cast<unsigned>(sizeof(block_t) * 8);
static_cast<size_type>(sizeof(block_t) * 8);

constexpr basic_bitvec() noexcept {}
constexpr basic_bitvec(std::string_view s) noexcept { set(s); }
Expand All @@ -35,9 +35,9 @@ struct basic_bitvec {

auto cista_members() noexcept { return std::tie(blocks_); }

static constexpr unsigned num_blocks(std::size_t num_bits) {
return static_cast<unsigned>(num_bits / bits_per_block +
(num_bits % bits_per_block == 0 ? 0 : 1));
static constexpr size_type num_blocks(size_type num_bits) {
return static_cast<size_type>(num_bits / bits_per_block +
(num_bits % bits_per_block == 0 ? 0 : 1));
}

void resize(size_type const new_size) {
Expand Down Expand Up @@ -65,7 +65,7 @@ struct basic_bitvec {
constexpr void set(size_type const i, bool const val = true) noexcept {
assert(i < size_);
assert((i / bits_per_block) < blocks_.size());
auto& block = blocks_[static_cast<unsigned>(i) / bits_per_block];
auto& block = blocks_[static_cast<size_type>(i) / bits_per_block];
auto const bit = i % bits_per_block;
if (val) {
block |= (block_t{1U} << bit);
Expand All @@ -80,7 +80,7 @@ struct basic_bitvec {

std::size_t count() const noexcept {
auto sum = std::size_t{0U};
for (auto i = std::size_t{0U}; i != blocks_.size() - 1; ++i) {
for (auto i = size_type{0U}; i != blocks_.size() - 1; ++i) {
sum += popcount(blocks_[i]);
}
return sum + popcount(sanitized_last_block());
Expand All @@ -91,11 +91,31 @@ struct basic_bitvec {
return false;
}
assert((i / bits_per_block) < blocks_.size());
auto const block = blocks_[static_cast<unsigned>(i) / bits_per_block];
auto const block = blocks_[static_cast<size_type>(i) / bits_per_block];
auto const bit = (i % bits_per_block);
return (block & (block_t{1U} << bit)) != 0U;
}

template <typename Fn>
void for_each_set_bit(Fn&& f) const {
if (empty()) {
return;
}
auto const check_block = [&](size_type const i, block_t const block) {
if (block != 0U) {
for (auto bit = size_type{0U}; bit != bits_per_block; ++bit) {
if ((block & (block_t{1U} << bit)) != 0U) {
f(i * bits_per_block + bit);
}
}
}
};
for (auto i = size_type{0U}; i != blocks_.size() - 1; ++i) {
check_block(i, blocks_[i]);
}
check_block(blocks_.size() - 1, sanitized_last_block());
}

size_type size() const noexcept { return size_; }
bool empty() const noexcept { return size() == 0U; }

Expand Down
24 changes: 17 additions & 7 deletions include/cista/containers/mmap_vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

namespace cista {

template <typename T, typename Key = std::size_t>
template <typename T, typename Key = std::uint32_t>
struct basic_mmap_vec {
using size_type = base_t<Key>;
using difference_type = std::ptrdiff_t;
Expand All @@ -23,7 +23,8 @@ struct basic_mmap_vec {
static_assert(std::is_trivially_copyable_v<T>);

explicit basic_mmap_vec(cista::mmap mmap)
: mmap_{std::move(mmap)}, used_size_{mmap_.size() / sizeof(T)} {}
: mmap_{std::move(mmap)},
used_size_{static_cast<size_type>(mmap_.size() / sizeof(T))} {}

void push_back(T const& t) {
++used_size_;
Expand All @@ -40,7 +41,7 @@ struct basic_mmap_vec {
return *ptr;
}

std::size_t size() const { return used_size_; }
size_type size() const { return used_size_; }

T const* data() const noexcept { return begin(); }
T* data() noexcept { return begin(); }
Expand All @@ -53,6 +54,12 @@ struct basic_mmap_vec {
T* begin() noexcept { return reinterpret_cast<T*>(mmap_.data()); }
T* end() noexcept { return begin() + used_size_; } // NOLINT

friend T const* begin(basic_mmap_vec const& a) noexcept { return a.begin(); }
friend T const* end(basic_mmap_vec const& a) noexcept { return a.end(); }

friend T* begin(basic_mmap_vec& a) noexcept { return a.begin(); }
friend T* end(basic_mmap_vec& a) noexcept { return a.end(); }

bool empty() const noexcept { return size() == 0U; }

T const& operator[](access_type const index) const noexcept {
Expand All @@ -65,11 +72,14 @@ struct basic_mmap_vec {
return begin()[to_idx(index)];
}

void reserve(std::size_t const size) { mmap_.resize(size * sizeof(T)); }
void reserve(size_type const size) { mmap_.resize(size * sizeof(T)); }

void resize(std::size_t const size) {
void resize(size_type const size) {
mmap_.resize(size * sizeof(T));
for (auto i = used_size_; i < size; ++i) {
new (data() + i) T{};
}
used_size_ = size;
mmap_.resize(size);
}

template <typename It>
Expand Down Expand Up @@ -161,7 +171,7 @@ struct basic_mmap_vec {
}

cista::mmap mmap_;
std::size_t used_size_{0U};
size_type used_size_{0U};
};

template <typename T>
Expand Down
8 changes: 4 additions & 4 deletions include/cista/containers/mutable_fws_multimap.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ struct dynamic_fws_multimap_base {
size_type size_{};
size_type capacity_{};
};
using IndexVec = Vec<index_type>;
using index_vec_t = Vec<index_type>;

template <bool Const>
struct bucket {
Expand Down Expand Up @@ -557,7 +557,7 @@ struct dynamic_fws_multimap_base {
std::optional<index_type> get_free_bucket(size_type const requested_order) {
assert(requested_order <= Log2MaxEntriesPerBucket);

auto const pop = [](IndexVec& vec) -> std::optional<index_type> {
auto const pop = [](index_vec_t& vec) -> std::optional<index_type> {
if (!vec.empty()) {
auto it = std::prev(vec.end());
auto const entry = *it;
Expand Down Expand Up @@ -619,9 +619,9 @@ struct dynamic_fws_multimap_base {
return size_type{cista::trailing_zeros(to_idx(size))};
}

IndexVec index_;
index_vec_t index_;
data_vec_t data_;
array<IndexVec, Log2MaxEntriesPerBucket + 1U> free_buckets_;
array<index_vec_t, Log2MaxEntriesPerBucket + 1U> free_buckets_;
size_type element_count_{};
};

Expand Down
138 changes: 138 additions & 0 deletions include/cista/containers/paged.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#pragma once

#include <cinttypes>
#include <cstring>
#include <limits>

#include "cista/bit_counting.h"
#include "cista/containers/array.h"
#include "cista/next_power_of_2.h"
#include "cista/verify.h"

namespace cista {

template <typename SizeType>
struct page {
bool valid() const { return capacity_ != 0U; }
SizeType size() const noexcept { return size_; }

SizeType size_{0U};
SizeType capacity_{0U};
SizeType start_{0U};
};

template <typename DataVec, typename SizeType = typename DataVec::size_type,
SizeType MinPageSize = next_power_of_two(3U * sizeof(SizeType)),
SizeType MaxPageSize = 65536U>
struct paged {
using value_type = typename DataVec::value_type;
using iterator = typename DataVec::iterator;
using const_iterator = typename DataVec::const_iterator;
using reference = typename DataVec::reference;
using const_reference = typename DataVec::const_reference;
using size_type = SizeType;
using page_t = page<SizeType>;

static_assert(sizeof(value_type) * MinPageSize >= sizeof(page_t));
static_assert(std::is_trivially_copyable_v<value_type>);

static constexpr size_type free_list_index(size_type const capacity) {
return static_cast<size_type>(constexpr_trailing_zeros(capacity) -
constexpr_trailing_zeros(MinPageSize));
}

static constexpr size_type free_list_size = free_list_index(MaxPageSize) + 1U;

page_t resize_page(page_t const& p, size_type const size) {
if (size <= p.capacity_) {
return {size, p.capacity_, p.start_};
} else {
auto const new_page = create_page(size);
copy(new_page, p);
free_page(p);
return new_page;
}
}

page_t create_page(size_type const size) {
auto const capacity = next_power_of_two(std::max(MinPageSize, size));
auto const i = free_list_index(capacity);
verify(i < free_list_.size(), "paged::create_page: size > max capacity");
if (!free_list_[i].empty()) {
auto start = free_list_[i].pop(*this);
return {size, capacity, start};
} else {
auto const start = data_.size();
data_.resize(data_.size() + capacity);
return {size, capacity, start};
}
}

void free_page(page_t const& p) {
if (!p.valid()) {
return;
}
auto const i = free_list_index(p.capacity_);
verify(i < free_list_.size(), "paged::free_page: size > max capacity");
free_list_[i].push(*this, p.start_);
}

template <typename T>
T read(size_type const offset) {
static_assert(std::is_trivially_copyable_v<T>);
auto x = T{};
std::memcpy(&x, &data_[offset], sizeof(x));
return x;
}

template <typename T>
void write(size_type const offset, T const& x) {
static_assert(std::is_trivially_copyable_v<T>);
std::memcpy(&data_[offset], &x, sizeof(T));
}

value_type* data(page_t const& p) { return &data_[p.start_]; }
value_type const* data(page_t const& p) const { return &data_[p.start_]; }

value_type* begin(page_t const& p) { return data(p); }
value_type const* begin(page_t const& p) const { return data(p); }

value_type* end(page_t& p) const { return begin(p) + p.size(); }
value_type const* end(page_t const& p) const { return begin() + p.size; }

void copy(page_t const& to, page_t const& from) {
std::memcpy(data(to), data(from), from.size() * sizeof(value_type));
}

template <typename ItA, typename ItB>
void copy(page_t const& to, ItA begin, ItB end) {
std::memcpy(data(to), &*begin,
static_cast<std::size_t>(std::distance(begin, end)));
}

struct node {
bool empty() const {
return next_ == std::numeric_limits<size_type>::max();
}

void push(paged& m, size_type const start) {
m.write(start, next_);
next_ = start;
}

size_type pop(paged& m) {
verify(!empty(), "paged: invalid read access to empty free list entry");
auto const next_start = m.read<size_type>(next_);
auto start = next_;
next_ = next_start;
return start;
}

size_type next_{std::numeric_limits<size_type>::max()};
};

DataVec data_;
array<node, free_list_size> free_list_{};
};

} // namespace cista
Loading

0 comments on commit ea98b07

Please sign in to comment.