Skip to content

Commit

Permalink
Merge branch 'main' into garth/docker-bump
Browse files Browse the repository at this point in the history
  • Loading branch information
garth-wells authored Jan 11, 2025
2 parents e02d141 + 7dee60c commit ec4c80a
Show file tree
Hide file tree
Showing 36 changed files with 824 additions and 518 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/ccpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ jobs:
find . -type f \( -name "*.cmake" -o -name "*.cmake.in" -o -name "CMakeLists.txt" \) | xargs cmake-format --check
build:
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
needs: [fenicsx-refs, lint]
env:
OMPI_ALLOW_RUN_AS_ROOT: 1
Expand All @@ -93,7 +93,9 @@ jobs:
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install catch2 cmake g++ libboost-dev libhdf5-mpi-dev libparmetis-dev libpugixml-dev libspdlog-dev mpi-default-dev ninja-build pkg-config
sudo apt-get install catch2 cmake g++ libblas-dev libboost-dev libhdf5-mpi-dev \
liblapack-dev libparmetis-dev libpugixml-dev libspdlog-dev mpi-default-dev \
ninja-build pkg-config
- name: Set up Python
uses: actions/setup-python@v5
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:

mac-os-build:
name: macOS Homebrew install and test
runs-on: macos-14
runs-on: macos-15
needs: fenicsx-refs
env:
PETSC_ARCH: arch-darwin-c-opt
Expand Down
2 changes: 2 additions & 0 deletions cpp/dolfinx/fem/assemble_vector_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1295,8 +1295,10 @@ void assemble_vector(
std::shared_ptr<const mesh::Mesh<U>> mesh = L.mesh();
assert(mesh);
if constexpr (std::is_same_v<U, scalar_value_type_t<T>>)
{
assemble_vector(b, L, mesh->geometry().dofmap(), mesh->geometry().x(),
constants, coefficients);
}
else
{
auto x = mesh->geometry().x();
Expand Down
18 changes: 9 additions & 9 deletions cpp/dolfinx/fem/discreteoperators.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ namespace dolfinx::fem
/// a Lagrange finite element function in \f$V_0 \subset H^1\f$ into a
/// Nédélec (first kind) space \f$V_1 \subset H({\rm curl})\f$, i.e.
/// \f$\nabla V_0 \rightarrow V_1\f$. If \f$u_0\f$ is the
/// degree-of-freedom vector associated with \f$V_0\f$, the hen
/// degree-of-freedom vector associated with \f$V_0\f$, then
/// \f$u_1=Au_0\f$ where \f$u_1\f$ is the degrees-of-freedom vector for
/// interpolating function in the \f$H({\rm curl})\f$ space. An example
/// of where discrete gradient operators are used is the creation of
/// algebraic multigrid solvers for \f$H({\rm curl})\f$ and
/// \f$H({\rm div})\f$ problems.
/// algebraic multigrid solvers for \f$H({\rm curl})\f$ and \f$H({\rm
/// div})\f$ problems.
///
/// @note The sparsity pattern for a discrete operator can be
/// initialised using sparsitybuild::cells. The space `V1` should be
Expand All @@ -44,9 +44,9 @@ namespace dolfinx::fem
///
/// @param[in] topology Mesh topology
/// @param[in] V0 Lagrange element and dofmap for corresponding space to
/// interpolate the gradient from
/// @param[in] V1 Nédélec (first kind) element and and dofmap for
/// corresponding space to interpolate into
/// interpolate the gradient from.
/// @param[in] V1 Nédélec (first kind) element and dofmap for
/// corresponding space to interpolate into.
/// @param[in] mat_set A functor that sets values in a matrix
template <dolfinx::scalar T,
std::floating_point U = dolfinx::scalar_value_type_t<T>>
Expand Down Expand Up @@ -148,9 +148,9 @@ void discrete_gradient(mesh::Topology& topology,
/// initialised using sparsitybuild::cells. The space `V1` should be
/// used for the rows of the sparsity pattern, `V0` for the columns.
///
/// @param[in] V0 The space to interpolate from
/// @param[in] V1 The space to interpolate to
/// @param[in] mat_set A functor that sets values in a matrix
/// @param[in] V0 Space to interpolate from.
/// @param[in] V1 Space to interpolate to.
/// @param[in] mat_set Functor that sets values in a matrix.
template <dolfinx::scalar T, std::floating_point U>
void interpolation_matrix(const FunctionSpace<U>& V0,
const FunctionSpace<U>& V1, auto&& mat_set)
Expand Down
83 changes: 83 additions & 0 deletions cpp/dolfinx/la/MatrixCSR.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#pragma once

#include "SparsityPattern.h"
#include "Vector.h"
#include "matrix_csr_impl.h"
#include <algorithm>
#include <dolfinx/common/IndexMap.h>
Expand Down Expand Up @@ -322,6 +323,15 @@ class MatrixCSR
/// @note MPI Collective
double squared_norm() const;

/// @brief Compute the product `y += Ax`.
///
/// The vectors `x` and `y` must have parallel layouts that are
/// compatible with `A`.
///
/// @param[in] x Vector to be apply `A` to.
/// @param[in,out] y Vector to accumulate the result into.
void mult(Vector<value_type>& x, Vector<value_type>& y);

/// @brief Index maps for the row and column space.
///
/// The row IndexMap contains ghost entries for rows which may be
Expand Down Expand Up @@ -734,4 +744,77 @@ double MatrixCSR<U, V, W, X>::squared_norm() const
}
//-----------------------------------------------------------------------------

// The matrix A is distributed across P processes by blocks of rows:
// A = | A_0 |
// | A_1 |
// | ... |
// | A_P-1 |
//
// Each submatrix A_i is owned by a single process "i" and can be further
// decomposed into diagonal (Ai[0]) and off diagonal (Ai[1]) blocks:
// Ai = |Ai[0] Ai[1]|
//
// If A is square, the diagonal block Ai[0] is also square and contains
// only owned columns and rows. The block Ai[1] contains ghost columns
// (unowned dofs).

// Likewise, a local vector x can be decomposed into owned and ghost blocks:
// xi = | x[0] |
// | x[1] |
//
// So the product y = Ax can be computed into two separate steps:
// y[0] = |Ai[0] Ai[1]| | x[0] | = Ai[0] x[0] + Ai[1] x[1]
// | x[1] |
//
/// Computes y += A*x for a parallel CSR matrix A and parallel dense vectors x,y
template <typename Scalar, typename V, typename W, typename X>
void MatrixCSR<Scalar, V, W, X>::mult(la::Vector<Scalar>& x,
la::Vector<Scalar>& y)
{
// start communication (update ghosts)
x.scatter_fwd_begin();

const std::int32_t nrowslocal = num_owned_rows();
std::span<const std::int64_t> Arow_ptr(row_ptr().data(), nrowslocal + 1);
std::span<const std::int32_t> Acols(cols().data(), Arow_ptr[nrowslocal]);
std::span<const std::int64_t> Aoff_diag_offset(off_diag_offset().data(),
nrowslocal);
std::span<const Scalar> Avalues(values().data(), Arow_ptr[nrowslocal]);

std::span<const Scalar> _x = x.array();
std::span<Scalar> _y = y.mutable_array();

std::span<const std::int64_t> Arow_begin(Arow_ptr.data(), nrowslocal);
std::span<const std::int64_t> Arow_end(Arow_ptr.data() + 1, nrowslocal);

// First stage: spmv - diagonal
// yi[0] += Ai[0] * xi[0]
if (_bs[1] == 1)
{
impl::spmv<Scalar, 1>(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, _y,
_bs[0], 1);
}
else
{
impl::spmv<Scalar, -1>(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, _y,
_bs[0], _bs[1]);
}

// finalize ghost update
x.scatter_fwd_end();

// Second stage: spmv - off-diagonal
// yi[0] += Ai[1] * xi[1]
if (_bs[1] == 1)
{
impl::spmv<Scalar, 1>(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y,
_bs[0], 1);
}
else
{
impl::spmv<Scalar, -1>(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y,
_bs[0], _bs[1]);
}
}

} // namespace dolfinx::la
64 changes: 50 additions & 14 deletions cpp/dolfinx/la/matrix_csr_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,12 @@ void insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr,
const X& x, const Y& xrows, const Y& xcols, OP op,
typename Y::value_type num_rows, int bs0, int bs1);

} // namespace impl

//-----------------------------------------------------------------------------
template <int BS0, int BS1, typename OP, typename U, typename V, typename W,
typename X, typename Y>
void impl::insert_csr(U&& data, const V& cols, const W& row_ptr, const X& x,
const Y& xrows, const Y& xcols, OP op,
[[maybe_unused]] typename Y::value_type num_rows)
void insert_csr(U&& data, const V& cols, const W& row_ptr, const X& x,
const Y& xrows, const Y& xcols, OP op,
[[maybe_unused]] typename Y::value_type num_rows)
{
const std::size_t nc = xcols.size();
assert(x.size() == xrows.size() * xcols.size() * BS0 * BS1);
Expand Down Expand Up @@ -151,9 +149,9 @@ void impl::insert_csr(U&& data, const V& cols, const W& row_ptr, const X& x,
// Insert with block insertion into a regular CSR (block size 1)
template <int BS0, int BS1, typename OP, typename U, typename V, typename W,
typename X, typename Y>
void impl::insert_blocked_csr(U&& data, const V& cols, const W& row_ptr,
const X& x, const Y& xrows, const Y& xcols, OP op,
[[maybe_unused]] typename Y::value_type num_rows)
void insert_blocked_csr(U&& data, const V& cols, const W& row_ptr, const X& x,
const Y& xrows, const Y& xcols, OP op,
[[maybe_unused]] typename Y::value_type num_rows)
{
const std::size_t nc = xcols.size();
assert(x.size() == xrows.size() * xcols.size() * BS0 * BS1);
Expand Down Expand Up @@ -195,12 +193,10 @@ void impl::insert_blocked_csr(U&& data, const V& cols, const W& row_ptr,
// Add individual entries in block-CSR storage
template <typename OP, typename U, typename V, typename W, typename X,
typename Y>
void impl::insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr,
const X& x, const Y& xrows, const Y& xcols,
OP op,
[[maybe_unused]]
typename Y::value_type num_rows,
int bs0, int bs1)
void insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr,
const X& x, const Y& xrows, const Y& xcols, OP op,
[[maybe_unused]] typename Y::value_type num_rows,
int bs0, int bs1)
{
const std::size_t nc = xcols.size();
const int nbs = bs0 * bs1;
Expand Down Expand Up @@ -237,4 +233,44 @@ void impl::insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr,
}
}
//-----------------------------------------------------------------------------

template <typename T, int BS1>
void spmv(std::span<const T> values, std::span<const std::int64_t> row_begin,
std::span<const std::int64_t> row_end,
std::span<const std::int32_t> indices, std::span<const T> x,
std::span<T> y, int bs0, int bs1)
{
assert(row_begin.size() == row_end.size());

for (int k0 = 0; k0 < bs0; ++k0)
{
for (std::size_t i = 0; i < row_begin.size(); i++)
{
T vi{0};
for (std::int32_t j = row_begin[i]; j < row_end[i]; j++)
{
if constexpr (BS1 == -1)
{
for (int k1 = 0; k1 < bs1; ++k1)
{
vi += values[j * bs1 * bs0 + k1 * bs0 + k0]
* x[indices[j] * bs1 + k1];
}
}
else
{
for (int k1 = 0; k1 < BS1; ++k1)
{
vi += values[j * BS1 * bs0 + k1 * bs0 + k0]
* x[indices[j] * BS1 + k1];
}
}
}

y[i * bs0 + k0] += vi;
}
}
}

} // namespace impl
} // namespace dolfinx::la
4 changes: 3 additions & 1 deletion cpp/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,13 @@ add_executable(
${CMAKE_CURRENT_BINARY_DIR}/poisson.c
)
target_link_libraries(unittests PRIVATE Catch2::Catch2WithMain dolfinx)

# UUID requires bcrypt to be linked on Windows, broken in vcpkg.
# https://github.com/microsoft/vcpkg/issues/4481
if(WIN32)
target_link_libraries(unittests PRIVATE bcrypt)
endif()

target_include_directories(
unittests PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
)
Expand All @@ -71,7 +73,7 @@ endif()
target_compile_options(
unittests
PRIVATE
$<$<AND:$<CONFIG:Developer>,$<COMPILE_LANGUAGE:CXX>>:${DOLFINX_CXX_DEVELOPER_FLAGS}>
$<$<AND:$<CONFIG:Developer>,$<COMPILE_LANGUAGE:CXX>>:${DOLFINX_CXX_DEVELOPER_FLAGS}>
)

# Enable testing
Expand Down
84 changes: 1 addition & 83 deletions cpp/test/matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,88 +23,6 @@ using namespace dolfinx;

namespace
{
/// Computes y += A*x for a local CSR matrix A and local dense vectors x,y
/// @param[in] values Nonzero values of A
/// @param[in] row_begin First index of each row in the arrays values and
/// indices.
/// @param[in] row_end Last index of each row in the arrays values and indices.
/// @param[in] indices Column indices for each non-zero element of the matrix A
/// @param[in] x Input vector
/// @param[in, out] x Output vector
template <typename T>
void spmv_impl(std::span<const T> values,
std::span<const std::int64_t> row_begin,
std::span<const std::int64_t> row_end,
std::span<const std::int32_t> indices, std::span<const T> x,
std::span<T> y)
{
assert(row_begin.size() == row_end.size());
for (std::size_t i = 0; i < row_begin.size(); i++)
{
double vi{0};
for (std::int32_t j = row_begin[i]; j < row_end[i]; j++)
vi += values[j] * x[indices[j]];
y[i] += vi;
}
}

// The matrix A is distributed across P processes by blocks of rows:
// A = | A_0 |
// | A_1 |
// | ... |
// | A_P-1 |
//
// Each submatrix A_i is owned by a single process "i" and can be further
// decomposed into diagonal (Ai[0]) and off diagonal (Ai[1]) blocks:
// Ai = |Ai[0] Ai[1]|
//
// If A is square, the diagonal block Ai[0] is also square and contains
// only owned columns and rows. The block Ai[1] contains ghost columns
// (unowned dofs).

// Likewise, a local vector x can be decomposed into owned and ghost blocks:
// xi = | x[0] |
// | x[1] |
//
// So the product y = Ax can be computed into two separate steps:
// y[0] = |Ai[0] Ai[1]| | x[0] | = Ai[0] x[0] + Ai[1] x[1]
// | x[1] |
//
/// Computes y += A*x for a parallel CSR matrix A and parallel dense vectors x,y
/// @param[in] A Parallel CSR matrix
/// @param[in] x Input vector
/// @param[in, out] y Output vector
template <typename T>
void spmv(la::MatrixCSR<T>& A, la::Vector<T>& x, la::Vector<T>& y)
{
// start communication (update ghosts)
x.scatter_fwd_begin();

const std::int32_t nrowslocal = A.num_owned_rows();
std::span<const std::int64_t> row_ptr(A.row_ptr().data(), nrowslocal + 1);
std::span<const std::int32_t> cols(A.cols().data(), row_ptr[nrowslocal]);
std::span<const std::int64_t> off_diag_offset(A.off_diag_offset().data(),
nrowslocal);
std::span<const T> values(A.values().data(), row_ptr[nrowslocal]);

std::span<const T> _x = x.array();
std::span<T> _y = y.mutable_array();

std::span<const std::int64_t> row_begin(row_ptr.data(), nrowslocal);
std::span<const std::int64_t> row_end(row_ptr.data() + 1, nrowslocal);

// First stage: spmv - diagonal
// yi[0] += Ai[0] * xi[0]
spmv_impl<T>(values, row_begin, off_diag_offset, cols, _x, _y);

// finalize ghost update
x.scatter_fwd_end();

// Second stage: spmv - off-diagonal
// yi[0] += Ai[1] * xi[1]
spmv_impl<T>(values, off_diag_offset, row_end, cols, _x, _y);
}

/// @brief Create a matrix operator
/// @param comm The communicator to builf the matrix on
/// @return The assembled matrix
Expand Down Expand Up @@ -195,7 +113,7 @@ la::MatrixCSR<double> create_operator(MPI_Comm comm)

// Matrix A represents the action of the Laplace operator, so when
// applied to a constant vector the result should be zero
spmv(A, x, y);
A.mult(x, y);

std::ranges::for_each(y.array(),
[](auto a) { REQUIRE(std::abs(a) < 1e-13); });
Expand Down
Loading

0 comments on commit ec4c80a

Please sign in to comment.