Skip to content

Commit

Permalink
add in-place MOZART LU decomp
Browse files Browse the repository at this point in the history
  • Loading branch information
mattldawson committed Dec 23, 2024
1 parent 1cbd28e commit 70a2956
Show file tree
Hide file tree
Showing 8 changed files with 600 additions and 4 deletions.
1 change: 1 addition & 0 deletions include/micm/solver/lu_decomposition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "lu_decomposition_doolittle.hpp"
#include "lu_decomposition_mozart.hpp"
#include "lu_decomposition_mozart_in_place.hpp"

namespace micm
{
Expand Down
99 changes: 99 additions & 0 deletions include/micm/solver/lu_decomposition_mozart_in_place.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright (C) 2023-2024 National Center for Atmospheric Research
// SPDX-License-Identifier: Apache-2.0
#pragma once

#include <micm/profiler/instrumentation.hpp>
#include <micm/util/sparse_matrix.hpp>

namespace micm
{

/// @brief LU decomposer for SparseMatrix following the algorithm from the MOZART model
///
/// This LU decomposition uses the algorithm from the MOZART chemistry preprocessor at:
/// https://github.com/ESCOMP/CHEM_PREPROCESSOR/blob/f923081508f4264e61fcef2753a9898e55d1598e/src/cam_chempp/make_lu_fac.f#L127-L214
///
/// The MOZART function overwrote the A matrix with the L and U matrices. The pseudo-code
/// in C++ for the corresponding dense matrix algorithm for matrix A (inline change) would be:
///
/// for i = 0...n-1 // Outer loop over columns of the sparse matrix A
/// for j = i+1...n-1 // Multiply column below diagonal
/// A[j][i] = A[j][i] / A[i][i]
/// for k = i+1...n-1 // Modify sub-matrix
/// for j = i+1...n-1
/// A[j][k] = A[j][k] – A[j][i] * A[i][k]
///
/// For the sparse matrix algorithm, the indices of non-zero terms are stored in
/// several arrays during construction. These arrays are iterated through during
/// calls to Decompose to do the actual decomposition.
///
/// The GetLUMatrices function creates a new sparse matrix that includes the superset
/// of the non-zero elements in the L and U matrices. It is expected that the elements
/// of the L and U matrices that are zero in the A matrix will be set to zero before the
/// combined matrix is passed to the decomposition function.
class LuDecompositionMozartInPlace
{
protected:
/// Index in A.data_ for all diagonal elements, the number of iterations of the inner (j) loop
/// for each (i) used to set A[j][i], and the number of iterations of the middle (k) loop for
/// each (i) used to set A[j][k]
std::vector<std::tuple<std::size_t, std::size_t, std::size_t>> aii_nji_nki_;
/// Index in A.data_ for A[j][i] for each iteration of the inner (j) loop
/// used to set the value of A[j][i]
std::vector<std::size_t> aji_;
/// Index in A.data_ for A[i][k] for each iteration of the middle (k) loop,
/// and the number of iterations of the inner (j) loop for each (k) used to set A[j][k]
std::vector<std::pair<std::size_t, std::size_t>> aik_njk_;
/// Index in A.data_ for A[j][k] and A[j][i] for each iteration of the inner (j) loop
std::vector<std::pair<std::size_t, std::size_t>> ajk_aji_;

public:
/// @brief default constructor
LuDecompositionMozartInPlace();

LuDecompositionMozartInPlace(const LuDecompositionMozartInPlace&) = delete;
LuDecompositionMozartInPlace& operator=(const LuDecompositionMozartInPlace&) = delete;

LuDecompositionMozartInPlace(LuDecompositionMozartInPlace&& other) = default;
LuDecompositionMozartInPlace& operator=(LuDecompositionMozartInPlace&&) = default;

/// @brief Construct an LU decomposition algorithm for a given sparse matrix
/// @param matrix Sparse matrix
template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>) LuDecompositionMozartInPlace(const SparseMatrixPolicy& matrix);

~LuDecompositionMozartInPlace() = default;

/// @brief Create an LU decomposition algorithm for a given sparse matrix policy
/// @param matrix Sparse matrix
template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>) static LuDecompositionMozartInPlace Create(const SparseMatrixPolicy& matrix);

/// @brief Create a combined sparse L and U matrix for a given A matrix
/// @param A Sparse matrix that will be decomposed
/// @return combined L and U Sparse matrices
template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>) static SparseMatrixPolicy GetLUMatrix(
const SparseMatrixPolicy& A,
typename SparseMatrixPolicy::value_type initial_value);

/// @brief Perform an LU decomposition on a given A matrix.
/// All elements of L and U that are zero in A should be set to zero before calling this function.
/// @param ALU Sparse matrix to decompose (will be overwritten with L and U matrices)
template<class SparseMatrixPolicy>
requires(!VectorizableSparse<SparseMatrixPolicy>) void Decompose(
SparseMatrixPolicy& ALU) const;
template<class SparseMatrixPolicy>
requires(VectorizableSparse<SparseMatrixPolicy>) void Decompose(
SparseMatrixPolicy& ALU) const;

private:
/// @brief Initialize arrays for the LU decomposition
/// @param A Sparse matrix to decompose
template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>) void Initialize(const SparseMatrixPolicy& matrix, auto initial_value);
};

} // namespace micm

#include "lu_decomposition_mozart_in_place.inl"
183 changes: 183 additions & 0 deletions include/micm/solver/lu_decomposition_mozart_in_place.inl
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
// Copyright (C) 2023-2024 National Center for Atmospheric Research
// SPDX-License-Identifier: Apache-2.0

namespace micm
{

inline LuDecompositionMozartInPlace::LuDecompositionMozartInPlace()
{
}

template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>) inline LuDecompositionMozartInPlace::LuDecompositionMozartInPlace(const SparseMatrixPolicy& matrix)
{
Initialize<SparseMatrixPolicy>(matrix, typename SparseMatrixPolicy::value_type());
}

template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>) inline LuDecompositionMozartInPlace LuDecompositionMozartInPlace::Create(
const SparseMatrixPolicy& matrix)
{
LuDecompositionMozartInPlace lu_decomp{};
lu_decomp.Initialize<SparseMatrixPolicy>(matrix, typename SparseMatrixPolicy::value_type());
return lu_decomp;
}

template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>) inline void LuDecompositionMozartInPlace::Initialize(
const SparseMatrixPolicy& matrix,
auto initial_value)
{
MICM_PROFILE_FUNCTION();

std::size_t n = matrix.NumRows();
auto ALU = GetLUMatrix<SparseMatrixPolicy>(matrix, initial_value);
for (std::size_t i = 0; i < n; ++i)
{
if (ALU.IsZero(i, i)) {
throw std::runtime_error("Diagonal element is zero in LU decomposition");
}
std::tuple<std::size_t, std::size_t, std::size_t> aii_nji_nki(ALU.VectorIndex(0, i, i), 0, 0);
for (std::size_t j = i + 1; j < n; ++j)
{
if (ALU.IsZero(j, i))
continue;
aji_.push_back(ALU.VectorIndex(0, j, i));
++(std::get<1>(aii_nji_nki));
}
for (std::size_t k = i + 1; k < n; ++k)
{
if (ALU.IsZero(i, k))
continue;
std::pair<std::size_t, std::size_t> aik_njk(ALU.VectorIndex(0, i, k), 0);
for (std::size_t j = i + 1; j < n; ++j)
{
if (ALU.IsZero(j, i))
continue;
std::pair<std::size_t, std::size_t> ajk_aji(ALU.VectorIndex(0, j, k), ALU.VectorIndex(0, j, i));
ajk_aji_.push_back(ajk_aji);
++(std::get<1>(aik_njk));
}
aik_njk_.push_back(aik_njk);
++(std::get<2>(aii_nji_nki));
}
aii_nji_nki_.push_back(aii_nji_nki);
}
}

template<class SparseMatrixPolicy>
requires(
SparseMatrixConcept<SparseMatrixPolicy>) inline SparseMatrixPolicy LuDecompositionMozartInPlace::
GetLUMatrix(const SparseMatrixPolicy& A, typename SparseMatrixPolicy::value_type initial_value)
{
MICM_PROFILE_FUNCTION();

std::size_t n = A.NumRows();
std::set<std::pair<std::size_t, std::size_t>> ALU_ids;
for (std::size_t i = 0; i < n; ++i)
{
for (std::size_t j = 0; j < n; ++j)
if (!A.IsZero(i, j))
ALU_ids.insert(std::make_pair(i, j));
}
for (std::size_t i = 0; i < n; ++i)
{
for (std::size_t j = i + 1; j < n; ++j)
if (std::find(ALU_ids.begin(), ALU_ids.end(), std::make_pair(j, i)) != ALU_ids.end())
ALU_ids.insert(std::make_pair(j, i));
for (std::size_t k = i + 1; k < n; ++k)
if (std::find(ALU_ids.begin(), ALU_ids.end(), std::make_pair(i, k)) != ALU_ids.end())
for (std::size_t j = i + 1; j < n; ++j)
if (std::find(ALU_ids.begin(), ALU_ids.end(), std::make_pair(j, i)) != ALU_ids.end())
ALU_ids.insert(std::make_pair(j, k));
}
auto ALU_builder = SparseMatrixPolicy::Create(n).SetNumberOfBlocks(A.NumberOfBlocks()).InitialValue(initial_value);
for (auto& pair : ALU_ids)
{
ALU_builder = ALU_builder.WithElement(pair.first, pair.second);
}
return SparseMatrixPolicy(ALU_builder);
}

template<class SparseMatrixPolicy>
requires(!VectorizableSparse<SparseMatrixPolicy>) inline void LuDecompositionMozartInPlace::Decompose(
SparseMatrixPolicy& ALU) const
{
MICM_PROFILE_FUNCTION();
const std::size_t n = ALU.NumRows();

// Loop over blocks
for (std::size_t i_block = 0; i_block < ALU.NumberOfBlocks(); ++i_block)
{
auto ALU_vector = std::next(ALU.AsVector().begin(), i_block * ALU.FlatBlockSize());
auto aji = aji_.begin();
auto aik_njk = aik_njk_.begin();
auto ajk_aji = ajk_aji_.begin();

for (const auto& aii_nji_nki : aii_nji_nki_)
{
const typename SparseMatrixPolicy::value_type Aii_inverse = 1.0 / ALU_vector[std::get<0>(aii_nji_nki)];
for (std::size_t ij = 0; ij < std::get<1>(aii_nji_nki); ++ij)
{
ALU_vector[*aji] *= Aii_inverse;
++aji;
}
for (std::size_t ik = 0; ik < std::get<2>(aii_nji_nki); ++ik)
{
const typename SparseMatrixPolicy::value_type Aik = ALU_vector[std::get<0>(*aik_njk)];
for (std::size_t ijk = 0; ijk < std::get<1>(*aik_njk); ++ijk)
{
ALU_vector[ajk_aji->first] -= ALU_vector[ajk_aji->second] * Aik;
++ajk_aji;
}
++aik_njk;
}
}
}
}

template<class SparseMatrixPolicy>
requires(VectorizableSparse<SparseMatrixPolicy>) inline void LuDecompositionMozartInPlace::Decompose(
SparseMatrixPolicy& ALU) const
{
MICM_PROFILE_FUNCTION();

const std::size_t n = ALU.NumRows();
const std::size_t ALU_BlockSize = ALU.NumberOfBlocks();
constexpr std::size_t ALU_GroupVectorSize = SparseMatrixPolicy::GroupVectorSize();
const std::size_t ALU_GroupSizeOfFlatBlockSize = ALU.GroupSize();
double Aii_inverse[ALU_GroupVectorSize];

// Loop over groups of blocks
for (std::size_t i_group = 0; i_group < ALU.NumberOfGroups(ALU_BlockSize); ++i_group)
{
auto ALU_vector = std::next(ALU.AsVector().begin(), i_group * ALU_GroupSizeOfFlatBlockSize);
const std::size_t n_cells = std::min(ALU_GroupVectorSize, ALU_BlockSize - i_group * ALU_GroupVectorSize);
auto aji = aji_.begin();
auto aik_njk = aik_njk_.begin();
auto ajk_aji = ajk_aji_.begin();
for (const auto& aii_nji_nki : aii_nji_nki_)
{
for (std::size_t i = 0; i < n_cells; ++i)
Aii_inverse[i] = 1.0 / ALU_vector[std::get<0>(aii_nji_nki) + i];
for (std::size_t ij = 0; ij < std::get<1>(aii_nji_nki); ++ij)
{
for (std::size_t i = 0; i < n_cells; ++i)
ALU_vector[*aji + i] *= Aii_inverse[i];
++aji;
}
for (std::size_t ik = 0; ik < std::get<2>(aii_nji_nki); ++ik)
{
const std::size_t aik = std::get<0>(*aik_njk);
for (std::size_t ijk = 0; ijk < std::get<1>(*aik_njk); ++ijk)
{
for (std::size_t i = 0; i < n_cells; ++i)
ALU_vector[ajk_aji->first + i] -= ALU_vector[ajk_aji->second + i] * ALU_vector[aik + i];
++ajk_aji;
}
++aik_njk;
}
}
}
}
} // namespace micm
1 change: 1 addition & 0 deletions test/unit/solver/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ create_standard_test(NAME chapman_ode_solver SOURCES test_chapman_ode_solver.cpp
create_standard_test(NAME linear_solver SOURCES test_linear_solver.cpp)
create_standard_test(NAME lu_decomposition_doolittle SOURCES test_lu_decomposition_doolittle.cpp)
create_standard_test(NAME lu_decomposition_mozart SOURCES test_lu_decomposition_mozart.cpp)
create_standard_test(NAME lu_decomposition_mozart_in_place SOURCES test_lu_decomposition_mozart_in_place.cpp)
create_standard_test(NAME rosenbrock SOURCES test_rosenbrock.cpp)
create_standard_test(NAME state SOURCES test_state.cpp)
create_standard_test(NAME backward_euler SOURCES test_backward_euler.cpp)
Expand Down
4 changes: 2 additions & 2 deletions test/unit/solver/test_lu_decomposition_doolittle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ TEST(LuDecompositionDoolittle, DiagonalMatrixStandardOrdering)
testDiagonalMatrix<SparseMatrixTest, micm::LuDecompositionDoolittle>(5);
}

TEST(LuDecompositionDoolittle, AgnosticToInitialValue)
TEST(LuDecompositionDoolittle, AgnosticToInitialValueStandardOrdering)
{
double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY };
for (auto& value : initial_values)
Expand Down Expand Up @@ -62,7 +62,7 @@ TEST(LuDecompositionDoolittle, DiagonalMatrixVectorOrdering)
testDiagonalMatrix<Group4SparseVectorMatrix, micm::LuDecompositionDoolittle>(5);
}

TEST(LuDecompositionDoolittle, VectorOrderingAgnosticToInitialValue)
TEST(LuDecompositionDoolittle, AgnosticToInitialValueVectorOrdering)
{
double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY };
for (auto& value : initial_values)
Expand Down
Loading

0 comments on commit 70a2956

Please sign in to comment.