Merge branch 'main' into garth/docker-bump

FEniCS · Jan 11, 2025 · ec4c80a · ec4c80a
2 parents e02d141 + 7dee60c
commit ec4c80a
Show file tree

Hide file tree

Showing 36 changed files with 824 additions and 518 deletions.
diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
@@ -78,7 +78,7 @@ jobs:
           find . -type f \( -name "*.cmake" -o -name "*.cmake.in" -o -name "CMakeLists.txt" \) | xargs cmake-format --check
 
   build:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04
     needs: [fenicsx-refs, lint]
     env:
       OMPI_ALLOW_RUN_AS_ROOT: 1
@@ -93,7 +93,9 @@ jobs:
       - name: Install dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install catch2 cmake g++ libboost-dev libhdf5-mpi-dev libparmetis-dev libpugixml-dev libspdlog-dev mpi-default-dev ninja-build pkg-config
+          sudo apt-get install catch2 cmake g++ libblas-dev libboost-dev libhdf5-mpi-dev \
+             liblapack-dev libparmetis-dev libpugixml-dev libspdlog-dev mpi-default-dev \
+             ninja-build pkg-config
       - name: Set up Python
         uses: actions/setup-python@v5
         with:

diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml
@@ -20,7 +20,7 @@ jobs:
 
   mac-os-build:
     name: macOS Homebrew install and test
-    runs-on: macos-14
+    runs-on: macos-15
     needs: fenicsx-refs
     env:
       PETSC_ARCH: arch-darwin-c-opt

diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h
@@ -1295,8 +1295,10 @@ void assemble_vector(
   std::shared_ptr<const mesh::Mesh<U>> mesh = L.mesh();
   assert(mesh);
   if constexpr (std::is_same_v<U, scalar_value_type_t<T>>)
+  {
     assemble_vector(b, L, mesh->geometry().dofmap(), mesh->geometry().x(),
                     constants, coefficients);
+  }
   else
   {
     auto x = mesh->geometry().x();

diff --git a/cpp/dolfinx/fem/discreteoperators.h b/cpp/dolfinx/fem/discreteoperators.h
@@ -28,12 +28,12 @@ namespace dolfinx::fem
 /// a Lagrange finite element function in \f$V_0 \subset H^1\f$ into a
 /// Nédélec (first kind) space \f$V_1 \subset H({\rm curl})\f$, i.e.
 /// \f$\nabla V_0 \rightarrow V_1\f$. If \f$u_0\f$ is the
-/// degree-of-freedom vector associated with \f$V_0\f$, the hen
+/// degree-of-freedom vector associated with \f$V_0\f$, then
 /// \f$u_1=Au_0\f$ where \f$u_1\f$ is the degrees-of-freedom vector for
 /// interpolating function in the \f$H({\rm curl})\f$ space. An example
 /// of where discrete gradient operators are used is the creation of
-/// algebraic multigrid solvers for \f$H({\rm curl})\f$  and
-/// \f$H({\rm div})\f$ problems.
+/// algebraic multigrid solvers for \f$H({\rm curl})\f$ and \f$H({\rm
+/// div})\f$ problems.
 ///
 /// @note The sparsity pattern for a discrete operator can be
 /// initialised using sparsitybuild::cells. The space `V1` should be
@@ -44,9 +44,9 @@ namespace dolfinx::fem
 ///
 /// @param[in] topology Mesh topology
 /// @param[in] V0 Lagrange element and dofmap for corresponding space to
-/// interpolate the gradient from
-/// @param[in] V1 Nédélec (first kind) element and and dofmap for
-/// corresponding space to interpolate into
+/// interpolate the gradient from.
+/// @param[in] V1 Nédélec (first kind) element and dofmap for
+/// corresponding space to interpolate into.
 /// @param[in] mat_set A functor that sets values in a matrix
 template <dolfinx::scalar T,
           std::floating_point U = dolfinx::scalar_value_type_t<T>>
@@ -148,9 +148,9 @@ void discrete_gradient(mesh::Topology& topology,
 /// initialised using sparsitybuild::cells. The space `V1` should be
 /// used for the rows of the sparsity pattern, `V0` for the columns.
 ///
-/// @param[in] V0 The space to interpolate from
-/// @param[in] V1 The space to interpolate to
-/// @param[in] mat_set A functor that sets values in a matrix
+/// @param[in] V0 Space to interpolate from.
+/// @param[in] V1 Space to interpolate to.
+/// @param[in] mat_set Functor that sets values in a matrix.
 template <dolfinx::scalar T, std::floating_point U>
 void interpolation_matrix(const FunctionSpace<U>& V0,
                           const FunctionSpace<U>& V1, auto&& mat_set)

diff --git a/cpp/dolfinx/la/MatrixCSR.h b/cpp/dolfinx/la/MatrixCSR.h
@@ -7,6 +7,7 @@
 #pragma once
 
 #include "SparsityPattern.h"
+#include "Vector.h"
 #include "matrix_csr_impl.h"
 #include <algorithm>
 #include <dolfinx/common/IndexMap.h>
@@ -322,6 +323,15 @@ class MatrixCSR
   /// @note MPI Collective
   double squared_norm() const;
 
+  /// @brief Compute the product `y += Ax`.
+  ///
+  /// The vectors `x` and `y` must have parallel layouts that are
+  /// compatible with `A`.
+  ///
+  /// @param[in] x Vector to be apply `A` to.
+  /// @param[in,out] y Vector to accumulate the result into.
+  void mult(Vector<value_type>& x, Vector<value_type>& y);
+
   /// @brief Index maps for the row and column space.
   ///
   /// The row IndexMap contains ghost entries for rows which may be
@@ -734,4 +744,77 @@ double MatrixCSR<U, V, W, X>::squared_norm() const
 }
 //-----------------------------------------------------------------------------
 
+// The matrix A is distributed across P  processes by blocks of rows:
+//  A = |   A_0  |
+//      |   A_1  |
+//      |   ...  |
+//      |  A_P-1 |
+//
+// Each submatrix A_i is owned by a single process "i" and can be further
+// decomposed into diagonal (Ai[0]) and off diagonal (Ai[1]) blocks:
+//  Ai = |Ai[0] Ai[1]|
+//
+// If A is square, the diagonal block Ai[0] is also square and contains
+// only owned columns and rows. The block Ai[1] contains ghost columns
+// (unowned dofs).
+
+// Likewise, a local vector x can be decomposed into owned and ghost blocks:
+// xi = |   x[0]  |
+//      |   x[1]  |
+//
+// So the product y = Ax can be computed into two separate steps:
+//  y[0] = |Ai[0] Ai[1]| |   x[0]  | = Ai[0] x[0] + Ai[1] x[1]
+//                       |   x[1]  |
+//
+/// Computes y += A*x for a parallel CSR matrix A and parallel dense vectors x,y
+template <typename Scalar, typename V, typename W, typename X>
+void MatrixCSR<Scalar, V, W, X>::mult(la::Vector<Scalar>& x,
+                                      la::Vector<Scalar>& y)
+{
+  // start communication (update ghosts)
+  x.scatter_fwd_begin();
+
+  const std::int32_t nrowslocal = num_owned_rows();
+  std::span<const std::int64_t> Arow_ptr(row_ptr().data(), nrowslocal + 1);
+  std::span<const std::int32_t> Acols(cols().data(), Arow_ptr[nrowslocal]);
+  std::span<const std::int64_t> Aoff_diag_offset(off_diag_offset().data(),
+                                                 nrowslocal);
+  std::span<const Scalar> Avalues(values().data(), Arow_ptr[nrowslocal]);
+
+  std::span<const Scalar> _x = x.array();
+  std::span<Scalar> _y = y.mutable_array();
+
+  std::span<const std::int64_t> Arow_begin(Arow_ptr.data(), nrowslocal);
+  std::span<const std::int64_t> Arow_end(Arow_ptr.data() + 1, nrowslocal);
+
+  // First stage:  spmv - diagonal
+  // yi[0] += Ai[0] * xi[0]
+  if (_bs[1] == 1)
+  {
+    impl::spmv<Scalar, 1>(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, _y,
+                          _bs[0], 1);
+  }
+  else
+  {
+    impl::spmv<Scalar, -1>(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, _y,
+                           _bs[0], _bs[1]);
+  }
+
+  // finalize ghost update
+  x.scatter_fwd_end();
+
+  // Second stage:  spmv - off-diagonal
+  // yi[0] += Ai[1] * xi[1]
+  if (_bs[1] == 1)
+  {
+    impl::spmv<Scalar, 1>(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y,
+                          _bs[0], 1);
+  }
+  else
+  {
+    impl::spmv<Scalar, -1>(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y,
+                           _bs[0], _bs[1]);
+  }
+}
+
 } // namespace dolfinx::la
diff --git a/cpp/dolfinx/la/matrix_csr_impl.h b/cpp/dolfinx/la/matrix_csr_impl.h
@@ -100,14 +100,12 @@ void insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr,
                            const X& x, const Y& xrows, const Y& xcols, OP op,
                            typename Y::value_type num_rows, int bs0, int bs1);
 
-} // namespace impl
-
 //-----------------------------------------------------------------------------
 template <int BS0, int BS1, typename OP, typename U, typename V, typename W,
           typename X, typename Y>
-void impl::insert_csr(U&& data, const V& cols, const W& row_ptr, const X& x,
-                      const Y& xrows, const Y& xcols, OP op,
-                      [[maybe_unused]] typename Y::value_type num_rows)
+void insert_csr(U&& data, const V& cols, const W& row_ptr, const X& x,
+                const Y& xrows, const Y& xcols, OP op,
+                [[maybe_unused]] typename Y::value_type num_rows)
 {
   const std::size_t nc = xcols.size();
   assert(x.size() == xrows.size() * xcols.size() * BS0 * BS1);
@@ -151,9 +149,9 @@ void impl::insert_csr(U&& data, const V& cols, const W& row_ptr, const X& x,
 // Insert with block insertion into a regular CSR (block size 1)
 template <int BS0, int BS1, typename OP, typename U, typename V, typename W,
           typename X, typename Y>
-void impl::insert_blocked_csr(U&& data, const V& cols, const W& row_ptr,
-                              const X& x, const Y& xrows, const Y& xcols, OP op,
-                              [[maybe_unused]] typename Y::value_type num_rows)
+void insert_blocked_csr(U&& data, const V& cols, const W& row_ptr, const X& x,
+                        const Y& xrows, const Y& xcols, OP op,
+                        [[maybe_unused]] typename Y::value_type num_rows)
 {
   const std::size_t nc = xcols.size();
   assert(x.size() == xrows.size() * xcols.size() * BS0 * BS1);
@@ -195,12 +193,10 @@ void impl::insert_blocked_csr(U&& data, const V& cols, const W& row_ptr,
 // Add individual entries in block-CSR storage
 template <typename OP, typename U, typename V, typename W, typename X,
           typename Y>
-void impl::insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr,
-                                 const X& x, const Y& xrows, const Y& xcols,
-                                 OP op,
-                                 [[maybe_unused]]
-                                 typename Y::value_type num_rows,
-                                 int bs0, int bs1)
+void insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr,
+                           const X& x, const Y& xrows, const Y& xcols, OP op,
+                           [[maybe_unused]] typename Y::value_type num_rows,
+                           int bs0, int bs1)
 {
   const std::size_t nc = xcols.size();
   const int nbs = bs0 * bs1;
@@ -237,4 +233,44 @@ void impl::insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr,
   }
 }
 //-----------------------------------------------------------------------------
+
+template <typename T, int BS1>
+void spmv(std::span<const T> values, std::span<const std::int64_t> row_begin,
+          std::span<const std::int64_t> row_end,
+          std::span<const std::int32_t> indices, std::span<const T> x,
+          std::span<T> y, int bs0, int bs1)
+{
+  assert(row_begin.size() == row_end.size());
+
+  for (int k0 = 0; k0 < bs0; ++k0)
+  {
+    for (std::size_t i = 0; i < row_begin.size(); i++)
+    {
+      T vi{0};
+      for (std::int32_t j = row_begin[i]; j < row_end[i]; j++)
+      {
+        if constexpr (BS1 == -1)
+        {
+          for (int k1 = 0; k1 < bs1; ++k1)
+          {
+            vi += values[j * bs1 * bs0 + k1 * bs0 + k0]
+                  * x[indices[j] * bs1 + k1];
+          }
+        }
+        else
+        {
+          for (int k1 = 0; k1 < BS1; ++k1)
+          {
+            vi += values[j * BS1 * bs0 + k1 * bs0 + k0]
+                  * x[indices[j] * BS1 + k1];
+          }
+        }
+      }
+
+      y[i * bs0 + k0] += vi;
+    }
+  }
+}
+
+} // namespace impl
 } // namespace dolfinx::la
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
@@ -51,11 +51,13 @@ add_executable(
   ${CMAKE_CURRENT_BINARY_DIR}/poisson.c
 )
 target_link_libraries(unittests PRIVATE Catch2::Catch2WithMain dolfinx)
+
 # UUID requires bcrypt to be linked on Windows, broken in vcpkg.
 # https://github.com/microsoft/vcpkg/issues/4481
 if(WIN32)
   target_link_libraries(unittests PRIVATE bcrypt)
 endif()
+
 target_include_directories(
   unittests PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
 )
@@ -71,7 +73,7 @@ endif()
 target_compile_options(
   unittests
   PRIVATE
-    $<$<AND:$<CONFIG:Developer>,$<COMPILE_LANGUAGE:CXX>>:${DOLFINX_CXX_DEVELOPER_FLAGS}>
+  $<$<AND:$<CONFIG:Developer>,$<COMPILE_LANGUAGE:CXX>>:${DOLFINX_CXX_DEVELOPER_FLAGS}>
 )
 
 # Enable testing

diff --git a/cpp/test/matrix.cpp b/cpp/test/matrix.cpp
@@ -23,88 +23,6 @@ using namespace dolfinx;
 
 namespace
 {
-/// Computes y += A*x for a local CSR matrix A and local dense vectors x,y
-/// @param[in] values Nonzero values of A
-/// @param[in] row_begin First index of each row in the arrays values and
-/// indices.
-/// @param[in] row_end Last index of each row in the arrays values and indices.
-/// @param[in] indices Column indices for each non-zero element of the matrix A
-/// @param[in] x Input vector
-/// @param[in, out] x Output vector
-template <typename T>
-void spmv_impl(std::span<const T> values,
-               std::span<const std::int64_t> row_begin,
-               std::span<const std::int64_t> row_end,
-               std::span<const std::int32_t> indices, std::span<const T> x,
-               std::span<T> y)
-{
-  assert(row_begin.size() == row_end.size());
-  for (std::size_t i = 0; i < row_begin.size(); i++)
-  {
-    double vi{0};
-    for (std::int32_t j = row_begin[i]; j < row_end[i]; j++)
-      vi += values[j] * x[indices[j]];
-    y[i] += vi;
-  }
-}
-
-// The matrix A is distributed across P  processes by blocks of rows:
-//  A = |   A_0  |
-//      |   A_1  |
-//      |   ...  |
-//      |  A_P-1 |
-//
-// Each submatrix A_i is owned by a single process "i" and can be further
-// decomposed into diagonal (Ai[0]) and off diagonal (Ai[1]) blocks:
-//  Ai = |Ai[0] Ai[1]|
-//
-// If A is square, the diagonal block Ai[0] is also square and contains
-// only owned columns and rows. The block Ai[1] contains ghost columns
-// (unowned dofs).
-
-// Likewise, a local vector x can be decomposed into owned and ghost blocks:
-// xi = |   x[0]  |
-//      |   x[1]  |
-//
-// So the product y = Ax can be computed into two separate steps:
-//  y[0] = |Ai[0] Ai[1]| |   x[0]  | = Ai[0] x[0] + Ai[1] x[1]
-//                       |   x[1]  |
-//
-/// Computes y += A*x for a parallel CSR matrix A and parallel dense vectors x,y
-/// @param[in] A Parallel CSR matrix
-/// @param[in] x Input vector
-/// @param[in, out] y Output vector
-template <typename T>
-void spmv(la::MatrixCSR<T>& A, la::Vector<T>& x, la::Vector<T>& y)
-{
-  // start communication (update ghosts)
-  x.scatter_fwd_begin();
-
-  const std::int32_t nrowslocal = A.num_owned_rows();
-  std::span<const std::int64_t> row_ptr(A.row_ptr().data(), nrowslocal + 1);
-  std::span<const std::int32_t> cols(A.cols().data(), row_ptr[nrowslocal]);
-  std::span<const std::int64_t> off_diag_offset(A.off_diag_offset().data(),
-                                                nrowslocal);
-  std::span<const T> values(A.values().data(), row_ptr[nrowslocal]);
-
-  std::span<const T> _x = x.array();
-  std::span<T> _y = y.mutable_array();
-
-  std::span<const std::int64_t> row_begin(row_ptr.data(), nrowslocal);
-  std::span<const std::int64_t> row_end(row_ptr.data() + 1, nrowslocal);
-
-  // First stage:  spmv - diagonal
-  // yi[0] += Ai[0] * xi[0]
-  spmv_impl<T>(values, row_begin, off_diag_offset, cols, _x, _y);
-
-  // finalize ghost update
-  x.scatter_fwd_end();
-
-  // Second stage:  spmv - off-diagonal
-  // yi[0] += Ai[1] * xi[1]
-  spmv_impl<T>(values, off_diag_offset, row_end, cols, _x, _y);
-}
-
 /// @brief Create a matrix operator
 /// @param comm The communicator to builf the matrix on
 /// @return The assembled matrix
@@ -195,7 +113,7 @@ la::MatrixCSR<double> create_operator(MPI_Comm comm)
 
   // Matrix A represents the action of the Laplace operator, so when
   // applied to a constant vector the result should be zero
-  spmv(A, x, y);
+  A.mult(x, y);
 
   std::ranges::for_each(y.array(),
                         [](auto a) { REQUIRE(std::abs(a) < 1e-13); });