diff --git a/libcusp/test/CMakeLists.txt b/libcusp/test/CMakeLists.txt index 710627302..7a26c8aaf 100644 --- a/libcusp/test/CMakeLists.txt +++ b/libcusp/test/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable(shad_dist_graph shad-dist-graph.cpp) -target_link_libraries(shad_dist_graph galois_gnn) +target_link_libraries(shad_dist_graph galois_shmem galois_cusp) diff --git a/libgalois/include/galois/LargeVector.h b/libgalois/include/galois/LargeVector.h index dfdf80f84..bd735c249 100644 --- a/libgalois/include/galois/LargeVector.h +++ b/libgalois/include/galois/LargeVector.h @@ -24,6 +24,10 @@ namespace galois { * 2. All iterator methods (e.g. increment) preserve generation. * 3. It is undefined behavior to compare iterators across generations. * 4. Decreasing the container size invalidates some iterators. + * + * Note also that, like LargeArray, this class does not call constructors or + * destructors for elements. If you need to call constructors or destructors, + * you can use placement new and explicit destructor calls. */ template class LargeVector : public boost::noncopyable { @@ -71,18 +75,20 @@ class LargeVector : public boost::noncopyable { throw std::runtime_error(std::string("mmap failed: ") + std::strerror(errno)); + madvise(m_data, file_size, MADV_WILLNEED | MADV_HUGEPAGE); + m_mappings.push_front(std::make_pair(m_data, mmap_size)); } public: - LargeVector(size_t initial_capacity) + LargeVector(size_t initial_size) : m_capacity(0), m_size(0), m_data(nullptr), m_fd(memfd_create("LargeVector", 0)), m_mappings({std::make_pair(nullptr, 0)}) { if (m_fd == -1) throw std::runtime_error(std::string("creating memfd: ") + std::strerror(errno)); - ensure_capacity(initial_capacity); + resize(initial_size); } LargeVector() : LargeVector(1) {} @@ -98,20 +104,13 @@ class LargeVector : public boost::noncopyable { assert(other.m_mappings.empty()); } - LargeVector& operator=(LargeVector&& other) { - m_capacity = std::move(other.m_capacity); - m_size = std::move(other.m_size); - m_data = std::move(other.m_data); - m_fd = std::move(other.m_fd); - m_mappings = std::move(other.m_mappings); - - other.m_capacity = 0; - other.m_size = 0; - other.m_data = nullptr; - other.m_fd = -1; - assert(other.m_mappings.empty()); - - return *this; + friend void swap(LargeVector& first, LargeVector& second) { + using std::swap; + swap(first.m_capacity, second.m_capacity); + swap(first.m_size, second.m_size); + swap(first.m_data, second.m_data); + swap(first.m_fd, second.m_fd); + swap(first.m_mappings, second.m_mappings); } ~LargeVector() { @@ -125,63 +124,30 @@ class LargeVector : public boost::noncopyable { uint64_t size() const noexcept { return m_size; } - template - T& emplace_back(Args&&... args) { - if (m_size == m_capacity) { - ensure_capacity(m_size + 1); - } - return *new (m_data + m_size++) T(std::forward(args)...); - } - - T& push_back(const T& t) { return emplace_back(t); } - - T& push_back(T&& t) { return emplace_back(std::move(t)); } - T& operator[](size_t index) const { return m_data[index]; } - void pop_back() { - assert(m_size > 0); - m_data[--m_size].~T(); - } - + /** + * Note: unlike std::vector, resize does not call constructors or + * destructors. + */ void resize(size_t count) { - for (T* ii = begin() + count; ii < end(); ++ii) - ii->~T(); + // galois::do_all(galois::iterate(begin() + count, end()), + // [](T* ii) { ii->~T(); }); ensure_capacity(count); - for (T* ii = end(); ii < begin() + count; ++ii) - new (ii) T(); + // galois::do_all(galois::iterate(end(), begin() + count), + // [](T* ii) { new (ii) T(); }); m_size = count; } + bool empty() { return m_size == 0; } + inline T* begin() { return m_data; } inline T* end() { return m_data + m_size; } }; }; // namespace galois -namespace std { -template -ostream& operator<<(std::ostream& os, const galois::LargeVector& vec) { - for (uint64_t i = 0; i < vec.getSize(); i++) { - os << vec[i]; - if (i < vec.getSize() - 1) { - os << " "; - } - } - return os; -} - -template -istream& operator>>(istream& is, galois::LargeVector& vec) { - T value; - while (is >> value) { - vec.push_back(value); - } - return is; -} -} // namespace std - #endif diff --git a/libgalois/include/galois/graphs/LS_LC_CSR_Graph.h b/libgalois/include/galois/graphs/LS_LC_CSR_Graph.h index 7a12c439d..dcc871fed 100644 --- a/libgalois/include/galois/graphs/LS_LC_CSR_Graph.h +++ b/libgalois/include/galois/graphs/LS_LC_CSR_Graph.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -81,7 +82,7 @@ class LS_LC_CSR_Graph : private boost::noncopyable { using EdgeMetadata = VertexTopologyID; VertexDataStore m_vertex_data; - std::vector m_vertices; + LargeVector m_vertices; // m_edges[0] is the CSR with gaps, m_edges[1] is the update log. LargeVector m_edges[2]; @@ -89,13 +90,11 @@ class LS_LC_CSR_Graph : private boost::noncopyable { EdgeDataStore m_edge_data; alignas(hardware_destructive_interference_size) std::atomic_uint64_t m_edges_tail = ATOMIC_VAR_INIT(0); - // m_holes is the number of holes in the log (m_edges[1]) - alignas(hardware_destructive_interference_size) std::atomic_uint64_t m_holes = - ATOMIC_VAR_INIT(0); /* * Prefix Sum utilities */ + static constexpr uint64_t PARALLEL_PREFIX_SUM_VERTEX_THRESHOLD = 1ul << 25; std::vector m_pfx_sum_cache; static uint64_t transmute(const VertexMetadata& vertex_meta) { return vertex_meta.degree(); @@ -110,19 +109,32 @@ class LS_LC_CSR_Graph : private boost::noncopyable { CacheLinePaddedArr> m_pfx{&m_vertices[0], &m_pfx_sum_cache[0]}; + static uint64_t transmute_b0(VertexMetadata const& vertex_meta) { + return (vertex_meta.buffer) ? 0 : vertex_meta.degree(); + } + static uint64_t scan_op_b0(VertexMetadata const& p, const uint64_t& l) { + return (p.buffer) ? l : p.degree() + l; + } + PrefixSum + m_pfx_b0{&m_vertices[0], &m_pfx_sum_cache[0]}; + + // todo(meyer): there is currently a memory leak in prefix sum :/ + bool m_prefix_valid; void resetPrefixSum() { m_pfx_sum_cache.resize(m_vertices.size()); m_pfx.src = &m_vertices[0]; m_pfx.dst = &m_pfx_sum_cache[0]; + m_pfx_b0.src = &m_vertices[0]; + m_pfx_b0.dst = &m_pfx_sum_cache[0]; m_prefix_valid = false; } // Compute the prefix sum using the two level method void computePrefixSum() { - constexpr uint64_t PARALLEL_PREFIX_SUM_VERTEX_THRESHOLD = - static_cast(1) << 30; + resetPrefixSum(); if (m_vertices.size() > PARALLEL_PREFIX_SUM_VERTEX_THRESHOLD) { m_pfx.computePrefixSumSerially(m_vertices.size()); } else { @@ -137,8 +149,13 @@ class LS_LC_CSR_Graph : private boost::noncopyable { } public: - LS_LC_CSR_Graph(uint64_t num_vertices) - : m_vertices(num_vertices, VertexMetadata()) { + LS_LC_CSR_Graph(uint64_t num_vertices) : m_vertices(num_vertices) { + galois::do_all(galois::iterate(0ul, num_vertices), + [&](VertexTopologyID const& vertex) { + m_vertices[vertex].buffer = 0; + m_vertices[vertex].begin = 0; + m_vertices[vertex].end = 0; + }); if constexpr (HasVertexData) { m_vertex_data.resize(num_vertices); } @@ -178,7 +195,7 @@ class LS_LC_CSR_Graph : private boost::noncopyable { VertexRange vertices() { return VertexRange(begin(), end()); } VertexTopologyID addVertexTopologyOnly() { - m_vertices.emplace_back(); + m_vertices.resize(m_vertices.size() + 1); if constexpr (HasVertexData) { m_vertex_data.resize(m_vertices.size()); } @@ -193,6 +210,12 @@ class LS_LC_CSR_Graph : private boost::noncopyable { VertexTopologyID addVerticesTopologyOnly(size_t count) { VertexTopologyID const start = m_vertices.size(); m_vertices.resize(start + count); + galois::do_all(galois::iterate(start, start + count), + [&](VertexTopologyID const& vertex) { + m_vertices[vertex].buffer = 0; + m_vertices[vertex].begin = 0; + m_vertices[vertex].end = 0; + }); return start; } @@ -298,7 +321,7 @@ class LS_LC_CSR_Graph : private boost::noncopyable { template void addEdges(VertexTopologyID src, const std::vector dsts, std::vector data) { - GALOIS_ASSERT(data.size() == dsts.size()); + GALOIS_ASSERT(data.size() == dsts.size(), "Data size mismatch"); this->addEdgesTopologyOnly(src, dsts); for (size_t i = 0; i < dsts.size(); ++i) { auto key = std::make_pair(src, dsts[i]); @@ -306,6 +329,84 @@ class LS_LC_CSR_Graph : private boost::noncopyable { } } + template + void addBatchTopologyOnly( + std::vector>> + edges) { + if (edges.empty()) + return; + + std::vector pfx_sum(edges.size()); + galois::GReduceMax max_vertex_id; + max_vertex_id.reset(); + galois::GAccumulator old_degree_total; + old_degree_total.reset(); + galois::do_all( + galois::iterate(0ul, edges.size()), + [&](size_t idx) { + auto const vertex_id = edges[idx].first; + max_vertex_id.update(vertex_id); + auto const old_degree = + vertex_id < m_vertices.size() ? getDegree(vertex_id) : 0; + old_degree_total += old_degree; + pfx_sum[idx] = old_degree + edges[idx].second.size(); + }, + galois::loopname("ComputeVertexDegrees")); + + uint64_t const prev_num_vertices = m_vertices.size(); + m_vertices.resize(std::max(max_vertex_id.reduce() + 1, m_vertices.size())); + galois::do_all(galois::iterate(m_vertices.begin() + prev_num_vertices, + m_vertices.end()), + [&](VertexMetadata& vertex_meta) { + vertex_meta.buffer = 0; + vertex_meta.begin = 0; + vertex_meta.end = 0; + }); + + for (size_t i = 1; i < pfx_sum.size(); ++i) + pfx_sum[i] += pfx_sum[i - 1]; + auto const num_new_edges = pfx_sum.back(); + std::cout << "old degrees total: " << old_degree_total.reduce() + << ", new degrees total: " << num_new_edges << std::endl; + + auto const start = + m_edges_tail.fetch_add(num_new_edges, std::memory_order_relaxed); + if (m_edges[1].size() < start + num_new_edges) + m_edges[1].resize(start + num_new_edges); + + galois::do_all( + galois::iterate(0ul, edges.size()), + [&](size_t idx) { + auto const& [src, dsts] = edges[idx]; + auto const new_begin = (idx) ? (start + pfx_sum[idx - 1]) : start; + auto const new_end = start + pfx_sum[idx]; + auto& vertex_meta = m_vertices[src]; + EdgeMetadata* log_dst = &getEdgeMetadata(1, new_begin); + if constexpr (sorted) { + std::merge(dsts.begin(), dsts.end(), + &getEdgeMetadata(vertex_meta.buffer, vertex_meta.begin), + &getEdgeMetadata(vertex_meta.buffer, vertex_meta.end), + log_dst); + } else { + // copy old edges + log_dst = std::copy( + &getEdgeMetadata(vertex_meta.buffer, vertex_meta.begin), + &getEdgeMetadata(vertex_meta.buffer, vertex_meta.end), log_dst); + + // insert new edges + std::copy(dsts.begin(), dsts.end(), log_dst); + } + + // update vertex metadata + vertex_meta.buffer = 1; + vertex_meta.begin = new_begin; + vertex_meta.end = new_end; + }, + galois::steal(), galois::loopname("CopyEdgesToLog")); + + m_prefix_valid = false; + } + /* * Adds outgoing edges from the given src to all dsts. If `sorted`, assume * both `dsts` and the existing edge array is sorted ascending, and maintain @@ -317,10 +418,7 @@ class LS_LC_CSR_Graph : private boost::noncopyable { // Copies the edge list to the end of m_edges[1] together with the new // edges. - auto& vertex_meta = m_vertices[src]; - if (vertex_meta.buffer == 1) - m_holes.fetch_add(vertex_meta.degree(), std::memory_order_relaxed); - + auto& vertex_meta = m_vertices[src]; uint64_t const new_degree = vertex_meta.degree() + dsts.size(); uint64_t const new_begin = m_edges_tail.fetch_add(new_degree, std::memory_order_relaxed); @@ -330,7 +428,7 @@ class LS_LC_CSR_Graph : private boost::noncopyable { m_edges_lock.lock(); { if (m_edges[1].size() < new_end) - m_edges[1].resize(std::max(m_edges[1].size() * 2, new_end)); + m_edges[1].resize(new_end); } m_edges_lock.unlock(); } @@ -359,6 +457,7 @@ class LS_LC_CSR_Graph : private boost::noncopyable { m_prefix_valid = false; } +public: // Performs the compaction algorithm by copying any vertices left in buffer 0 // to buffer 1, then swapping the buffers. // @@ -366,67 +465,83 @@ class LS_LC_CSR_Graph : private boost::noncopyable { void compact() { using std::swap; - // move from buffer 0 to buffer 1 - galois::do_all( - galois::iterate(vertices().begin(), vertices().end()), - [&](VertexTopologyID vertex_id) { - VertexMetadata& vertex_meta = m_vertices[vertex_id]; + if (m_vertices.empty()) + return; + auto const num_vertices = m_vertices.size(); - if (vertex_meta.buffer == 0) { - this->addEdgesTopologyOnly(vertex_id, {}); - } - - // we are about to swap the buffers, so all vertices will - // be in buffer 0 - vertex_meta.buffer = 0; - }, - galois::steal()); + // step 1: copy from CSR to log + { + resetPrefixSum(); + if (m_vertices.size() > PARALLEL_PREFIX_SUM_VERTEX_THRESHOLD) { + m_pfx_b0.computePrefixSum(m_vertices.size()); + } else { + m_pfx_b0.computePrefixSumSerially(m_vertices.size()); + } + auto const log_num_edges = m_pfx_sum_cache.back(); + auto const start = + m_edges_tail.fetch_add(log_num_edges, std::memory_order_relaxed); + if (m_edges[1].size() < start + log_num_edges) { + m_edges[1].resize(start + log_num_edges); + } - // At this point, there are no more live edges in buffer 0. - m_edges_lock.lock(); + galois::do_all( + galois::iterate(0ul, num_vertices), + [&](VertexTopologyID ii) { + auto& vertex_meta = m_vertices[ii]; + if (vertex_meta.buffer) + return; // already on the log + auto const new_begin = start + ((ii) ? m_pfx_sum_cache[ii - 1] : 0); + auto const new_end = start + m_pfx_sum_cache[ii]; + std::copy(&getEdgeMetadata(0, vertex_meta.begin), + &getEdgeMetadata(0, vertex_meta.end), + &getEdgeMetadata(1, new_begin)); + // vertex_meta.buffer = 1; + vertex_meta.buffer = 0; // not accurate, but it soon will be... + vertex_meta.begin = new_begin; + vertex_meta.end = new_end; + }, + galois::steal(), galois::loopname("CopyCSRToLog")); + } + // At this point, all edges are on the log (and the prefix sum is invalid). + // We can now compact into the CSR. { - m_edges[0].resize(0); + // compute the actual prefix sum + auto const& prefix_sum = getEdgePrefixSum(); + m_edges[0].resize(prefix_sum.back()); + + galois::do_all( + galois::iterate(0ul, m_vertices.size()), + [&](size_t idx) { + auto& vertex_meta = m_vertices[idx]; + auto const new_begin = (idx) ? prefix_sum[idx - 1] : 0; + auto const new_end = prefix_sum[idx]; + std::copy(&getEdgeMetadata(1, vertex_meta.begin), + &getEdgeMetadata(1, vertex_meta.end), + &getEdgeMetadata(0, new_begin)); + // vertex_meta.buffer = 0; // already done above + vertex_meta.begin = new_begin; + vertex_meta.end = new_end; + }, + galois::steal(), galois::loopname("CompactLogToCSR")); + swap(m_edges[0], m_edges[1]); - // relaxed is fine because of locks held: - m_edges_tail.store(0, std::memory_order_relaxed); - m_holes.store(0, std::memory_order_relaxed); + // m_edges[1].resize(0); + m_edges_tail = 0; + m_prefix_valid = false; } - m_edges_lock.unlock(); } /* Compaction policy utilities. */ - // Returns an estimated memory usage in bytes for the entire data structure. - inline size_t getMemoryUsageBytes() { - size_t estimate = m_vertices.size() * sizeof(VertexMetadata); - if constexpr (HasVertexData) { - estimate += m_vertices.size() * sizeof(VertexData); - } - m_edges_lock.lock(); - { - estimate += - (m_edges[0].size() + m_edges_tail.load(std::memory_order_relaxed)) * - sizeof(EdgeMetadata); - } - m_edges_lock.unlock(); - if constexpr (HasEdgeData) { - estimate += m_edge_data.size() * - (sizeof(EdgeData) + - sizeof(std::pair)); - } - return estimate; + inline size_t getCSRMemoryUsageBytes() { + return m_edges[0].size() * sizeof(EdgeMetadata); } // Returns the number of bytes used for the log. inline size_t getLogMemoryUsageBytes() { - return m_edges_tail.load(std::memory_order_relaxed) * sizeof(EdgeMetadata); - } - - // Returns the number of bytes used for holes in the log. - inline size_t getLogHolesMemoryUsageBytes() { - return m_holes.load(std::memory_order_relaxed) * sizeof(EdgeMetadata); + return m_edges_tail * sizeof(EdgeMetadata); } /** @@ -455,7 +570,7 @@ class LS_LC_CSR_Graph : private boost::noncopyable { uint64_t end; // exclusive uint8_t buffer; - VertexMetadata() : begin(0), end(0), buffer(0) {} + VertexMetadata() = delete; VertexMetadata(VertexMetadata const& other) : begin(other.begin), end(other.end), buffer(other.buffer) {} @@ -498,6 +613,6 @@ class LS_LC_CSR_Graph : private boost::noncopyable { }; }; -}; // namespace galois::graphs +} // namespace galois::graphs #endif diff --git a/libgalois/include/galois/graphs/MorphGraph.h b/libgalois/include/galois/graphs/MorphGraph.h index ef29d1c0a..7d11ed9e5 100644 --- a/libgalois/include/galois/graphs/MorphGraph.h +++ b/libgalois/include/galois/graphs/MorphGraph.h @@ -977,7 +977,7 @@ public //! Sorts edge of a node by destination. void sortEdgesByDst(GraphNode N, galois::MethodFlag mflag = MethodFlag::WRITE) { - acquire(N, mflag); + N->acquire(mflag); typedef typename gNode::EdgeInfo EdgeInfo; std::sort(N->begin(), N->end(), [=](const EdgeInfo& e1, const EdgeInfo& e2) { diff --git a/libgalois/test/graph-compile-lscsr.cpp b/libgalois/test/graph-compile-lscsr.cpp index a239958d3..8924664c3 100644 --- a/libgalois/test/graph-compile-lscsr.cpp +++ b/libgalois/test/graph-compile-lscsr.cpp @@ -70,15 +70,27 @@ int main() { g.setData(3, 3); GALOIS_ASSERT(g.getData(3) == 3); - uint64_t four = g.addVertices({4, 5, 6, 7}); + auto const four = g.addVertices({4, 5, 6, 7}); + std::vector>> new_edges = { + {four, {0, 1, 2, 3}}, + {four + 1, {0, 1, 2, 3}}, + {four + 2, {0, 1, 2, 3}}, + {four + 3, {0, 1, 2, 3}}}; + g.addBatchTopologyOnly(std::move(new_edges)); for (size_t ii = 0; ii < 4; ++ii) { // make sure previous data survived the resize GALOIS_ASSERT(g.getData(ii) == ii); - // check the new vertex data + // make sure new data is correct GALOIS_ASSERT(g.getData(four + ii) == 4 + ii); } + for (uint64_t ii = 0; ii < 4; ++ii) { + for (uint64_t jj = 0; jj < 4; ++jj) { + GALOIS_ASSERT(g.getEdgeDst(g.edge_begin(four + ii)[jj]) == jj); + } + } + g.addEdges(0, {1, 2, 3}, {1, 2, 3}); for (auto const& handle : g.edges(0)) { GALOIS_ASSERT(g.getEdgeDst(handle) == g.getEdgeData(handle)); @@ -124,7 +136,7 @@ int main() { GALOIS_ASSERT(g[1] == 3); GALOIS_ASSERT(g[2] == 3); // ... - GALOIS_ASSERT(g[8] == 7); + GALOIS_ASSERT(g[four + 4] == 23); uint64_t num_vertices = (1 << 22) + 67; galois::graphs::LS_LC_CSR_Graph big(num_vertices); diff --git a/libgalois/test/large-vector.cpp b/libgalois/test/large-vector.cpp index 313626041..0cd706dec 100644 --- a/libgalois/test/large-vector.cpp +++ b/libgalois/test/large-vector.cpp @@ -26,34 +26,32 @@ int main() { galois::SharedMemSys Galois_runtime; { - galois::LargeVector the_vector; + galois::LargeVector the_vector(1 << 21); // should use 4 hugepages - std::vector refs; - for (size_t i = 0; i < (1 << 21); ++i) { - refs.emplace_back(&the_vector.emplace_back(i)); + std::vector refs(the_vector.size()); + for (size_t i = 0; i < the_vector.size(); ++i) { + the_vector[i] = i; + refs[i] = &the_vector[i]; } - for (size_t i = 0; i < (1 << 21); ++i) { + for (size_t i = 0; i < the_vector.size(); ++i) { GALOIS_ASSERT(*refs[i] == i); } } { - static uint64_t num_constructed = 0, num_destructed = 0; class Object { uint8_t dummy; public: - Object() { ++num_constructed; } - ~Object() { ++num_destructed; } + Object() = delete; + ~Object() = delete; }; static_assert(sizeof(Object) > 0); const size_t max_cap = (1 << 22); galois::LargeVector the_vector(max_cap); - // constructor should not actually fill the vector - GALOIS_ASSERT(num_constructed == 0); // entire vector should be mapped, even if it is empty const Object* addr = &the_vector[max_cap]; @@ -61,16 +59,12 @@ int main() { the_vector.resize(max_cap); - GALOIS_ASSERT(num_constructed == max_cap); GALOIS_ASSERT(addr == &the_vector[max_cap]); - // resize should call the destructor, but vector should stay mapped - GALOIS_ASSERT(num_destructed == 0); the_vector.resize(0); - GALOIS_ASSERT(num_destructed == max_cap); GALOIS_ASSERT(addr == &the_vector[max_cap]); - // this should not actually allocate memory! + // this should only take 1 hugepage! galois::LargeVector huge(1ul << 40); huge[0] = 0; }