Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add in-place LU decomposition with Doolittle algorithm #705

Merged
merged 3 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions include/micm/solver/lu_decomposition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "lu_decomposition_doolittle.hpp"
#include "lu_decomposition_mozart.hpp"
#include "lu_decomposition_doolittle_in_place.hpp"
#include "lu_decomposition_mozart_in_place.hpp"

#include <micm/util/sparse_matrix.hpp>
Expand All @@ -26,6 +27,14 @@ namespace micm
LuDecompositionMozartInPlace,
SparseMatrix<double, SparseMatrixVectorOrderingCompressedSparseRow<1>>>,
"LuDecompositionMozartInPlace for vector matrices does not meet the LuDecompositionInPlaceConcept requirements");
static_assert(
LuDecompositionInPlaceConcept<LuDecompositionDoolittleInPlace, StandardSparseMatrix>,
"LuDecompositionDoolittleInPlace does not meet the LuDecompositionInPlaceConcept requirements");
static_assert(
LuDecompositionInPlaceConcept<
LuDecompositionDoolittleInPlace,
SparseMatrix<double, SparseMatrixVectorOrderingCompressedSparseRow<1>>>,
"LuDecompositionDoolittleInPlace for vector matrices does not meet the LuDecompositionInPlaceConcept requirements");

/// @brief Alias for the default LU decomposition algorithm
using LuDecomposition = LuDecompositionDoolittle;
Expand Down
106 changes: 106 additions & 0 deletions include/micm/solver/lu_decomposition_doolittle_in_place.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// Copyright (C) 2023-2024 National Center for Atmospheric Research
// SPDX-License-Identifier: Apache-2.0
#pragma once

#include <micm/profiler/instrumentation.hpp>
#include <micm/util/sparse_matrix.hpp>

namespace micm
{

/// @brief LU decomposer for SparseMatrix following the Doolittle algorithm
///
/// The LU decomposition uses the Doolittle algorithm following the
/// naming used here: https://www.geeksforgeeks.org/doolittle-algorithm-lu-decomposition/
///
/// The sudo-code for the corresponding dense matrix algorithm for matrix A (in-line) would be:
///
/// for i = 0...n-1 // Outer loop over rows (columns) for upper (lower) triangular matrix
/// for k = i...n-1 // Middle loop over columns for upper triangular matrix
/// for j = 0...i-1 // Inner loop over columns (rows) for lower (upper) triangular matrix
/// A[i][k] -= A[i][j] * A[j][k]
/// for k = i+1...n-1 // Middle loop over rows for lower triangular matrix
/// for j = 0...i-1 // Inner loop over columns (rows) for lower (upper) triangular matrix
/// A[k][i] -= A[k][j] * A[j][i];
/// A[k][i] /= A[i][i]
///
/// For the sparse matrix algorithm, the indices of non-zero terms are stored in
/// several arrays during construction. These arrays are iterated through during
/// calls to Decompose to do the actual decomposition.
/// Our LU Decomposition only assigns the values of the jacobian to the LU matrices
/// when the *jacobian* is nonzero. However, the sparsity pattern of the jacobian doesn't
/// necessarily match that of the LU matrices. There can be more nonzero elements in the LU matrices
/// than in the jacobian. It is expected that the elements of the L and U matrices that are zero in the A matrix
/// will be set to zero before the combined matrix is passed to the decomposition function.
class LuDecompositionDoolittleInPlace
{
protected:
/// Number of elements in the middle (k) loops for lower and upper triangular matrices, respectively,
/// and the index in A.data_ for A[i][i] for each iteration of the outer (i) loop
std::vector<std::tuple<std::size_t, std::size_t, std::size_t>> nik_nki_aii_;
/// Index in A.data_ for A[i][k] for each iteration of the upper middle (k) loop and the
/// number of elements in the inner (j) loop for each upper (k) element used to set A[i][k]
std::vector<std::pair<std::size_t, std::size_t>> aik_njk_;
/// Index in A.data_ for A[i][j] and A[j][k] for each iteration of the upper inner (j) loop
std::vector<std::pair<std::size_t, std::size_t>> aij_ajk_;
/// Index in A.data_ for A[k][i] for each iteration of the lower middle (k) loop, the
/// number of elements in the inner (j) loop for each lower (k) element used to set A[k][i]
std::vector<std::pair<std::size_t, std::size_t>> aki_nji_;
/// Index in A.data_ for A[k][j] and A[j][i] for each iteration of the lower inner (j) loop
std::vector<std::pair<std::size_t, std::size_t>> akj_aji_;

public:
/// @brief default constructor
LuDecompositionDoolittleInPlace();

LuDecompositionDoolittleInPlace(const LuDecompositionDoolittleInPlace&) = delete;
LuDecompositionDoolittleInPlace& operator=(const LuDecompositionDoolittleInPlace&) = delete;

LuDecompositionDoolittleInPlace(LuDecompositionDoolittleInPlace&& other) = default;
LuDecompositionDoolittleInPlace& operator=(LuDecompositionDoolittleInPlace&&) = default;

/// @brief Construct an LU decomposition algorithm for a given sparse matrix
/// @param matrix Sparse matrix
template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>)
LuDecompositionDoolittleInPlace(const SparseMatrixPolicy& matrix);

~LuDecompositionDoolittleInPlace() = default;

/// @brief Create an LU decomposition algorithm for a given sparse matrix policy
/// @param matrix Sparse matrix
template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>)
static LuDecompositionDoolittleInPlace Create(const SparseMatrixPolicy& matrix);

/// @brief Create sparse L and U matrices for a given A matrix
/// @param A Sparse matrix that will be decomposed
/// @return L and U Sparse matrices
template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>)
static SparseMatrixPolicy GetLUMatrix(
const SparseMatrixPolicy& A,
typename SparseMatrixPolicy::value_type initial_value);

/// @brief Perform an LU decomposition on a given A matrix
/// @param A Sparse matrix to decompose
/// @param L The lower triangular matrix created by decomposition
/// @param U The upper triangular matrix created by decomposition
template<class SparseMatrixPolicy>
requires(!VectorizableSparse<SparseMatrixPolicy>)
void Decompose(SparseMatrixPolicy& ALU) const;
template<class SparseMatrixPolicy>
requires(VectorizableSparse<SparseMatrixPolicy>)
void Decompose(SparseMatrixPolicy& ALU) const;

protected:
/// @brief Initialize arrays for the LU decomposition
/// @param A Sparse matrix to decompose
template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>)
void Initialize(const SparseMatrixPolicy& matrix, auto initial_value);
};

} // namespace micm

#include "lu_decomposition_doolittle_in_place.inl"
220 changes: 220 additions & 0 deletions include/micm/solver/lu_decomposition_doolittle_in_place.inl
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
// Copyright (C) 2023-2024 National Center for Atmospheric Research
// SPDX-License-Identifier: Apache-2.0

namespace micm
{

inline LuDecompositionDoolittleInPlace::LuDecompositionDoolittleInPlace()
{
}

template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>)
inline LuDecompositionDoolittleInPlace::LuDecompositionDoolittleInPlace(const SparseMatrixPolicy& matrix)
{
Initialize<SparseMatrixPolicy>(matrix, typename SparseMatrixPolicy::value_type());
}

template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>)
inline LuDecompositionDoolittleInPlace LuDecompositionDoolittleInPlace::Create(const SparseMatrixPolicy& matrix)
{
LuDecompositionDoolittleInPlace lu_decomp{};
lu_decomp.Initialize<SparseMatrixPolicy>(matrix, typename SparseMatrixPolicy::value_type());
return lu_decomp;
}

template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>)
inline void LuDecompositionDoolittleInPlace::Initialize(const SparseMatrixPolicy& matrix, auto initial_value)
{
MICM_PROFILE_FUNCTION();

std::size_t n = matrix.NumRows();
auto ALU = GetLUMatrix<SparseMatrixPolicy>(matrix, initial_value);
for (std::size_t i = 0; i < n; ++i)
{
if (ALU.IsZero(i, i))
{
throw std::runtime_error("Diagonal element is zero in LU decomposition");
}
std::tuple<std::size_t, std::size_t, std::size_t> nik_nki_aii(0, 0, ALU.VectorIndex(0, i, i));
for (std::size_t k = i; k < n; ++k)
{
if (ALU.IsZero(i, k))
continue;
std::pair<std::size_t, std::size_t> aik_njk(ALU.VectorIndex(0, i, k), 0);
for (std::size_t j = 0; j < i; ++j)
{
if (ALU.IsZero(i, j) || ALU.IsZero(j, k))
continue;
aij_ajk_.push_back(std::make_pair(ALU.VectorIndex(0, i, j), ALU.VectorIndex(0, j, k)));
++(aik_njk.second);
}
aik_njk_.push_back(aik_njk);
++(std::get<0>(nik_nki_aii));
}
for (std::size_t k = i + 1; k < n; ++k)
{
if (ALU.IsZero(k, i))
continue;
std::pair<std::size_t, std::size_t> aki_nji(ALU.VectorIndex(0, k, i), 0);
for (std::size_t j = 0; j < i; ++j)
{
if (ALU.IsZero(k, j) || ALU.IsZero(j, i))
continue;
akj_aji_.push_back(std::make_pair(ALU.VectorIndex(0, k, j), ALU.VectorIndex(0, j, i)));
++(aki_nji.second);
}
aki_nji_.push_back(aki_nji);
++(std::get<1>(nik_nki_aii));
}
nik_nki_aii_.push_back(nik_nki_aii);
}
}

template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>)
inline SparseMatrixPolicy LuDecompositionDoolittleInPlace::GetLUMatrix(
const SparseMatrixPolicy& A,
typename SparseMatrixPolicy::value_type initial_value)
{
MICM_PROFILE_FUNCTION();

std::size_t n = A.NumRows();
std::set<std::pair<std::size_t, std::size_t>> ALU_ids;
for (std::size_t i = 0; i < n; ++i)
{
// Upper triangular matrix
for (std::size_t k = i; k < n; ++k)
{
if (!A.IsZero(i, k) || k == i)
{
ALU_ids.insert(std::make_pair(i, k));
continue;
}
for (std::size_t j = 0; j < i; ++j)
{
if (ALU_ids.find(std::make_pair(i, j)) != ALU_ids.end() && ALU_ids.find(std::make_pair(j, k)) != ALU_ids.end())
{
ALU_ids.insert(std::make_pair(i, k));
break;
}
}
}
// Lower triangular matrix
for (std::size_t k = i; k < n; ++k)
{
if (!A.IsZero(k, i) || k == i)
{
ALU_ids.insert(std::make_pair(k, i));
continue;
}
for (std::size_t j = 0; j < i; ++j)
{
if (ALU_ids.find(std::make_pair(k, j)) != ALU_ids.end() && ALU_ids.find(std::make_pair(j, i)) != ALU_ids.end())
{
ALU_ids.insert(std::make_pair(k, i));
break;
}
}
}
}
auto ALU_builder = SparseMatrixPolicy::Create(n).SetNumberOfBlocks(A.NumberOfBlocks()).InitialValue(initial_value);
for (auto& pair : ALU_ids)
{
ALU_builder = ALU_builder.WithElement(pair.first, pair.second);
}
return SparseMatrixPolicy(ALU_builder);
}

template<class SparseMatrixPolicy>
requires(!VectorizableSparse<SparseMatrixPolicy>)
inline void LuDecompositionDoolittleInPlace::Decompose(SparseMatrixPolicy& ALU) const
{
MICM_PROFILE_FUNCTION();
const std::size_t n = ALU.NumRows();

// Loop over blocks
for (std::size_t i_block = 0; i_block < ALU.NumberOfBlocks(); ++i_block)
{
auto ALU_vector = std::next(ALU.AsVector().begin(), i_block * ALU.FlatBlockSize());
auto aik_njk = aik_njk_.begin();
auto aij_ajk = aij_ajk_.begin();
auto aki_nji = aki_nji_.begin();
auto akj_aji = akj_aji_.begin();

for (const auto& nik_nki_aii : nik_nki_aii_)
{
for (std::size_t ik = 0; ik < std::get<0>(nik_nki_aii); ++ik)
{
for (std::size_t jk = 0; jk < aik_njk->second; ++jk)
{
ALU_vector[aik_njk->first] -= ALU_vector[aij_ajk->first] * ALU_vector[aij_ajk->second];
++aij_ajk;
}
++aik_njk;
}
for (std::size_t ki = 0; ki < std::get<1>(nik_nki_aii); ++ki)
{
for (std::size_t ji = 0; ji < aki_nji->second; ++ji)
{
ALU_vector[aki_nji->first] -= ALU_vector[akj_aji->first] * ALU_vector[akj_aji->second];
++akj_aji;
}
ALU_vector[aki_nji->first] /= ALU_vector[std::get<2>(nik_nki_aii)];
++aki_nji;
}
}
}
}

template<class SparseMatrixPolicy>
requires(VectorizableSparse<SparseMatrixPolicy>)
inline void LuDecompositionDoolittleInPlace::Decompose(SparseMatrixPolicy& ALU) const
{
MICM_PROFILE_FUNCTION();

const std::size_t n = ALU.NumRows();
const std::size_t ALU_BlockSize = ALU.NumberOfBlocks();
constexpr std::size_t ALU_GroupVectorSize = SparseMatrixPolicy::GroupVectorSize();
const std::size_t ALU_GroupSizeOfFlatBlockSize = ALU.GroupSize();

// Loop over groups of blocks
for (std::size_t i_group = 0; i_group < ALU.NumberOfGroups(ALU_BlockSize); ++i_group)
{
auto ALU_vector = std::next(ALU.AsVector().begin(), i_group * ALU_GroupSizeOfFlatBlockSize);
const std::size_t n_cells = std::min(ALU_GroupVectorSize, ALU_BlockSize - i_group * ALU_GroupVectorSize);
auto aik_njk = aik_njk_.begin();
auto aij_ajk = aij_ajk_.begin();
auto aki_nji = aki_nji_.begin();
auto akj_aji = akj_aji_.begin();
for (const auto& nik_nki_aii : nik_nki_aii_)
{
for (std::size_t ik = 0; ik < std::get<0>(nik_nki_aii); ++ik)
{
for (std::size_t jk = 0; jk < aik_njk->second; ++jk)
{
for (std::size_t i = 0; i < n_cells; ++i)
ALU_vector[aik_njk->first + i] -= ALU_vector[aij_ajk->first + i] * ALU_vector[aij_ajk->second + i];
++aij_ajk;
}
++aik_njk;
}
for (std::size_t ki = 0; ki < std::get<1>(nik_nki_aii); ++ki)
{
for (std::size_t ji = 0; ji < aki_nji->second; ++ji)
{
for (std::size_t i = 0; i < n_cells; ++i)
ALU_vector[aki_nji->first + i] -= ALU_vector[akj_aji->first + i] * ALU_vector[akj_aji->second + i];
++akj_aji;
}
for (std::size_t i = 0; i < n_cells; ++i)
ALU_vector[aki_nji->first + i] /= ALU_vector[std::get<2>(nik_nki_aii) + i];
++aki_nji;
}
}
}
}

} // namespace micm
23 changes: 23 additions & 0 deletions include/micm/solver/solver_builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ namespace micm
using DenseMatrixPolicyType = DenseMatrixPolicy;
using SparseMatrixPolicyType = SparseMatrixPolicy;
using LuDecompositionPolicyType = LuDecompositionPolicy;
using LinearSolverPolicyType = LinearSolverPolicy;
using StatePolicyType = StatePolicy;

protected:
SolverParametersPolicy options_;
Expand Down Expand Up @@ -116,6 +118,8 @@ namespace micm
/// @tparam DenseMatrixPolicy Policy for dense matrices
/// @tparam SparseMatrixPolicy Policy for sparse matrices
/// @tparam LuDecompositionPolicy Policy for the LU decomposition
/// @tparam LMatrixPolicy Policy for the Lower matrix
/// @tparam UMatrixPolicy Policy for the Upper matrix
template<
class SolverParametersPolicy,
class DenseMatrixPolicy = Matrix<double>,
Expand All @@ -132,6 +136,25 @@ namespace micm
LinearSolver<SparseMatrixPolicy, LuDecompositionPolicy, LMatrixPolicy, UMatrixPolicy>,
State<DenseMatrixPolicy, SparseMatrixPolicy, LuDecompositionPolicy, LMatrixPolicy, UMatrixPolicy>>;

/// @brief Builder of CPU-based general solvers with in-place LU decomposition
/// @tparam SolverParametersPolicy Parameters for the ODE solver
/// @tparam DenseMatrixPolicy Policy for dense matrices
/// @tparam SparseMatrixPolicy Policy for sparse matrices
/// @tparam LuDecompositionPolicy Policy for the LU decomposition
template<
class SolverParametersPolicy,
class DenseMatrix = Matrix<double>,
class SparseMatrixPolicy = SparseMatrix<double, SparseMatrixStandardOrdering>,
class LuDecompositionPolicy = LuDecompositionInPlace>
using CpuSolverBuilderInPlace = SolverBuilder<
SolverParametersPolicy,
DenseMatrix,
SparseMatrixPolicy,
ProcessSet,
LuDecompositionPolicy,
LinearSolverInPlace<SparseMatrixPolicy, LuDecompositionPolicy>,
State<DenseMatrix, SparseMatrixPolicy, LuDecompositionPolicy>>;

} // namespace micm

#include "solver_builder.inl"
Loading
Loading