Skip to content

Commit

Permalink
merging main
Browse files Browse the repository at this point in the history
  • Loading branch information
K20shores committed Jan 13, 2025
2 parents 69fbc05 + faf21bd commit f34dfca
Show file tree
Hide file tree
Showing 7 changed files with 452 additions and 46 deletions.
9 changes: 9 additions & 0 deletions include/micm/solver/lu_decomposition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#pragma once

#include "lu_decomposition_doolittle.hpp"
#include "lu_decomposition_doolittle_in_place.hpp"
#include "lu_decomposition_mozart.hpp"
#include "lu_decomposition_mozart_in_place.hpp"

Expand All @@ -26,6 +27,14 @@ namespace micm
LuDecompositionMozartInPlace,
SparseMatrix<double, SparseMatrixVectorOrderingCompressedSparseRow<1>>>,
"LuDecompositionMozartInPlace for vector matrices does not meet the LuDecompositionInPlaceConcept requirements");
static_assert(
LuDecompositionInPlaceConcept<LuDecompositionDoolittleInPlace, StandardSparseMatrix>,
"LuDecompositionDoolittleInPlace does not meet the LuDecompositionInPlaceConcept requirements");
static_assert(
LuDecompositionInPlaceConcept<
LuDecompositionDoolittleInPlace,
SparseMatrix<double, SparseMatrixVectorOrderingCompressedSparseRow<1>>>,
"LuDecompositionDoolittleInPlace for vector matrices does not meet the LuDecompositionInPlaceConcept requirements");

/// @brief Alias for the default LU decomposition algorithm
using LuDecomposition = LuDecompositionDoolittle;
Expand Down
106 changes: 106 additions & 0 deletions include/micm/solver/lu_decomposition_doolittle_in_place.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// Copyright (C) 2023-2024 National Center for Atmospheric Research
// SPDX-License-Identifier: Apache-2.0
#pragma once

#include <micm/profiler/instrumentation.hpp>
#include <micm/util/sparse_matrix.hpp>

namespace micm
{

/// @brief LU decomposer for SparseMatrix following the Doolittle algorithm
///
/// The LU decomposition uses the Doolittle algorithm following the
/// naming used here: https://www.geeksforgeeks.org/doolittle-algorithm-lu-decomposition/
///
/// The sudo-code for the corresponding dense matrix algorithm for matrix A (in-line) would be:
///
/// for i = 0...n-1 // Outer loop over rows (columns) for upper (lower) triangular matrix
/// for k = i...n-1 // Middle loop over columns for upper triangular matrix
/// for j = 0...i-1 // Inner loop over columns (rows) for lower (upper) triangular matrix
/// A[i][k] -= A[i][j] * A[j][k]
/// for k = i+1...n-1 // Middle loop over rows for lower triangular matrix
/// for j = 0...i-1 // Inner loop over columns (rows) for lower (upper) triangular matrix
/// A[k][i] -= A[k][j] * A[j][i];
/// A[k][i] /= A[i][i]
///
/// For the sparse matrix algorithm, the indices of non-zero terms are stored in
/// several arrays during construction. These arrays are iterated through during
/// calls to Decompose to do the actual decomposition.
/// Our LU Decomposition only assigns the values of the jacobian to the LU matrices
/// when the *jacobian* is nonzero. However, the sparsity pattern of the jacobian doesn't
/// necessarily match that of the LU matrices. There can be more nonzero elements in the LU matrices
/// than in the jacobian. It is expected that the elements of the L and U matrices that are zero in the A matrix
/// will be set to zero before the combined matrix is passed to the decomposition function.
class LuDecompositionDoolittleInPlace
{
protected:
/// Number of elements in the middle (k) loops for lower and upper triangular matrices, respectively,
/// and the index in A.data_ for A[i][i] for each iteration of the outer (i) loop
std::vector<std::tuple<std::size_t, std::size_t, std::size_t>> nik_nki_aii_;
/// Index in A.data_ for A[i][k] for each iteration of the upper middle (k) loop and the
/// number of elements in the inner (j) loop for each upper (k) element used to set A[i][k]
std::vector<std::pair<std::size_t, std::size_t>> aik_njk_;
/// Index in A.data_ for A[i][j] and A[j][k] for each iteration of the upper inner (j) loop
std::vector<std::pair<std::size_t, std::size_t>> aij_ajk_;
/// Index in A.data_ for A[k][i] for each iteration of the lower middle (k) loop, the
/// number of elements in the inner (j) loop for each lower (k) element used to set A[k][i]
std::vector<std::pair<std::size_t, std::size_t>> aki_nji_;
/// Index in A.data_ for A[k][j] and A[j][i] for each iteration of the lower inner (j) loop
std::vector<std::pair<std::size_t, std::size_t>> akj_aji_;

public:
/// @brief default constructor
LuDecompositionDoolittleInPlace();

LuDecompositionDoolittleInPlace(const LuDecompositionDoolittleInPlace&) = delete;
LuDecompositionDoolittleInPlace& operator=(const LuDecompositionDoolittleInPlace&) = delete;

LuDecompositionDoolittleInPlace(LuDecompositionDoolittleInPlace&& other) = default;
LuDecompositionDoolittleInPlace& operator=(LuDecompositionDoolittleInPlace&&) = default;

/// @brief Construct an LU decomposition algorithm for a given sparse matrix
/// @param matrix Sparse matrix
template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>)
LuDecompositionDoolittleInPlace(const SparseMatrixPolicy& matrix);

~LuDecompositionDoolittleInPlace() = default;

/// @brief Create an LU decomposition algorithm for a given sparse matrix policy
/// @param matrix Sparse matrix
template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>)
static LuDecompositionDoolittleInPlace Create(const SparseMatrixPolicy& matrix);

/// @brief Create sparse L and U matrices for a given A matrix
/// @param A Sparse matrix that will be decomposed
/// @return L and U Sparse matrices
template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>)
static SparseMatrixPolicy GetLUMatrix(
const SparseMatrixPolicy& A,
typename SparseMatrixPolicy::value_type initial_value);

/// @brief Perform an LU decomposition on a given A matrix
/// @param A Sparse matrix to decompose
/// @param L The lower triangular matrix created by decomposition
/// @param U The upper triangular matrix created by decomposition
template<class SparseMatrixPolicy>
requires(!VectorizableSparse<SparseMatrixPolicy>)
void Decompose(SparseMatrixPolicy& ALU) const;
template<class SparseMatrixPolicy>
requires(VectorizableSparse<SparseMatrixPolicy>)
void Decompose(SparseMatrixPolicy& ALU) const;

protected:
/// @brief Initialize arrays for the LU decomposition
/// @param A Sparse matrix to decompose
template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>)
void Initialize(const SparseMatrixPolicy& matrix, auto initial_value);
};

} // namespace micm

#include "lu_decomposition_doolittle_in_place.inl"
220 changes: 220 additions & 0 deletions include/micm/solver/lu_decomposition_doolittle_in_place.inl
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
// Copyright (C) 2023-2024 National Center for Atmospheric Research
// SPDX-License-Identifier: Apache-2.0

namespace micm
{

inline LuDecompositionDoolittleInPlace::LuDecompositionDoolittleInPlace()
{
}

template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>)
inline LuDecompositionDoolittleInPlace::LuDecompositionDoolittleInPlace(const SparseMatrixPolicy& matrix)
{
Initialize<SparseMatrixPolicy>(matrix, typename SparseMatrixPolicy::value_type());
}

template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>)
inline LuDecompositionDoolittleInPlace LuDecompositionDoolittleInPlace::Create(const SparseMatrixPolicy& matrix)
{
LuDecompositionDoolittleInPlace lu_decomp{};
lu_decomp.Initialize<SparseMatrixPolicy>(matrix, typename SparseMatrixPolicy::value_type());
return lu_decomp;
}

template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>)
inline void LuDecompositionDoolittleInPlace::Initialize(const SparseMatrixPolicy& matrix, auto initial_value)
{
MICM_PROFILE_FUNCTION();

std::size_t n = matrix.NumRows();
auto ALU = GetLUMatrix<SparseMatrixPolicy>(matrix, initial_value);
for (std::size_t i = 0; i < n; ++i)
{
if (ALU.IsZero(i, i))
{
throw std::runtime_error("Diagonal element is zero in LU decomposition");
}
std::tuple<std::size_t, std::size_t, std::size_t> nik_nki_aii(0, 0, ALU.VectorIndex(0, i, i));
for (std::size_t k = i; k < n; ++k)
{
if (ALU.IsZero(i, k))
continue;
std::pair<std::size_t, std::size_t> aik_njk(ALU.VectorIndex(0, i, k), 0);
for (std::size_t j = 0; j < i; ++j)
{
if (ALU.IsZero(i, j) || ALU.IsZero(j, k))
continue;
aij_ajk_.push_back(std::make_pair(ALU.VectorIndex(0, i, j), ALU.VectorIndex(0, j, k)));
++(aik_njk.second);
}
aik_njk_.push_back(aik_njk);
++(std::get<0>(nik_nki_aii));
}
for (std::size_t k = i + 1; k < n; ++k)
{
if (ALU.IsZero(k, i))
continue;
std::pair<std::size_t, std::size_t> aki_nji(ALU.VectorIndex(0, k, i), 0);
for (std::size_t j = 0; j < i; ++j)
{
if (ALU.IsZero(k, j) || ALU.IsZero(j, i))
continue;
akj_aji_.push_back(std::make_pair(ALU.VectorIndex(0, k, j), ALU.VectorIndex(0, j, i)));
++(aki_nji.second);
}
aki_nji_.push_back(aki_nji);
++(std::get<1>(nik_nki_aii));
}
nik_nki_aii_.push_back(nik_nki_aii);
}
}

template<class SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>)
inline SparseMatrixPolicy LuDecompositionDoolittleInPlace::GetLUMatrix(
const SparseMatrixPolicy& A,
typename SparseMatrixPolicy::value_type initial_value)
{
MICM_PROFILE_FUNCTION();

std::size_t n = A.NumRows();
std::set<std::pair<std::size_t, std::size_t>> ALU_ids;
for (std::size_t i = 0; i < n; ++i)
{
// Upper triangular matrix
for (std::size_t k = i; k < n; ++k)
{
if (!A.IsZero(i, k) || k == i)
{
ALU_ids.insert(std::make_pair(i, k));
continue;
}
for (std::size_t j = 0; j < i; ++j)
{
if (ALU_ids.find(std::make_pair(i, j)) != ALU_ids.end() && ALU_ids.find(std::make_pair(j, k)) != ALU_ids.end())
{
ALU_ids.insert(std::make_pair(i, k));
break;
}
}
}
// Lower triangular matrix
for (std::size_t k = i; k < n; ++k)
{
if (!A.IsZero(k, i) || k == i)
{
ALU_ids.insert(std::make_pair(k, i));
continue;
}
for (std::size_t j = 0; j < i; ++j)
{
if (ALU_ids.find(std::make_pair(k, j)) != ALU_ids.end() && ALU_ids.find(std::make_pair(j, i)) != ALU_ids.end())
{
ALU_ids.insert(std::make_pair(k, i));
break;
}
}
}
}
auto ALU_builder = SparseMatrixPolicy::Create(n).SetNumberOfBlocks(A.NumberOfBlocks()).InitialValue(initial_value);
for (auto& pair : ALU_ids)
{
ALU_builder = ALU_builder.WithElement(pair.first, pair.second);
}
return SparseMatrixPolicy(ALU_builder);
}

template<class SparseMatrixPolicy>
requires(!VectorizableSparse<SparseMatrixPolicy>)
inline void LuDecompositionDoolittleInPlace::Decompose(SparseMatrixPolicy& ALU) const
{
MICM_PROFILE_FUNCTION();
const std::size_t n = ALU.NumRows();

// Loop over blocks
for (std::size_t i_block = 0; i_block < ALU.NumberOfBlocks(); ++i_block)
{
auto ALU_vector = std::next(ALU.AsVector().begin(), i_block * ALU.FlatBlockSize());
auto aik_njk = aik_njk_.begin();
auto aij_ajk = aij_ajk_.begin();
auto aki_nji = aki_nji_.begin();
auto akj_aji = akj_aji_.begin();

for (const auto& nik_nki_aii : nik_nki_aii_)
{
for (std::size_t ik = 0; ik < std::get<0>(nik_nki_aii); ++ik)
{
for (std::size_t jk = 0; jk < aik_njk->second; ++jk)
{
ALU_vector[aik_njk->first] -= ALU_vector[aij_ajk->first] * ALU_vector[aij_ajk->second];
++aij_ajk;
}
++aik_njk;
}
for (std::size_t ki = 0; ki < std::get<1>(nik_nki_aii); ++ki)
{
for (std::size_t ji = 0; ji < aki_nji->second; ++ji)
{
ALU_vector[aki_nji->first] -= ALU_vector[akj_aji->first] * ALU_vector[akj_aji->second];
++akj_aji;
}
ALU_vector[aki_nji->first] /= ALU_vector[std::get<2>(nik_nki_aii)];
++aki_nji;
}
}
}
}

template<class SparseMatrixPolicy>
requires(VectorizableSparse<SparseMatrixPolicy>)
inline void LuDecompositionDoolittleInPlace::Decompose(SparseMatrixPolicy& ALU) const
{
MICM_PROFILE_FUNCTION();

const std::size_t n = ALU.NumRows();
const std::size_t ALU_BlockSize = ALU.NumberOfBlocks();
constexpr std::size_t ALU_GroupVectorSize = SparseMatrixPolicy::GroupVectorSize();
const std::size_t ALU_GroupSizeOfFlatBlockSize = ALU.GroupSize();

// Loop over groups of blocks
for (std::size_t i_group = 0; i_group < ALU.NumberOfGroups(ALU_BlockSize); ++i_group)
{
auto ALU_vector = std::next(ALU.AsVector().begin(), i_group * ALU_GroupSizeOfFlatBlockSize);
const std::size_t n_cells = std::min(ALU_GroupVectorSize, ALU_BlockSize - i_group * ALU_GroupVectorSize);
auto aik_njk = aik_njk_.begin();
auto aij_ajk = aij_ajk_.begin();
auto aki_nji = aki_nji_.begin();
auto akj_aji = akj_aji_.begin();
for (const auto& nik_nki_aii : nik_nki_aii_)
{
for (std::size_t ik = 0; ik < std::get<0>(nik_nki_aii); ++ik)
{
for (std::size_t jk = 0; jk < aik_njk->second; ++jk)
{
for (std::size_t i = 0; i < n_cells; ++i)
ALU_vector[aik_njk->first + i] -= ALU_vector[aij_ajk->first + i] * ALU_vector[aij_ajk->second + i];
++aij_ajk;
}
++aik_njk;
}
for (std::size_t ki = 0; ki < std::get<1>(nik_nki_aii); ++ki)
{
for (std::size_t ji = 0; ji < aki_nji->second; ++ji)
{
for (std::size_t i = 0; i < n_cells; ++i)
ALU_vector[aki_nji->first + i] -= ALU_vector[akj_aji->first + i] * ALU_vector[akj_aji->second + i];
++akj_aji;
}
for (std::size_t i = 0; i < n_cells; ++i)
ALU_vector[aki_nji->first + i] /= ALU_vector[std::get<2>(nik_nki_aii) + i];
++aki_nji;
}
}
}
}

} // namespace micm
22 changes: 22 additions & 0 deletions include/micm/solver/solver_builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ namespace micm
using DenseMatrixPolicyType = DenseMatrixPolicy;
using SparseMatrixPolicyType = SparseMatrixPolicy;
using LuDecompositionPolicyType = LuDecompositionPolicy;
using LinearSolverPolicyType = LinearSolverPolicy;
using StatePolicyType = StatePolicy;

protected:
Expand Down Expand Up @@ -117,6 +118,8 @@ namespace micm
/// @tparam DenseMatrixPolicy Policy for dense matrices
/// @tparam SparseMatrixPolicy Policy for sparse matrices
/// @tparam LuDecompositionPolicy Policy for the LU decomposition
/// @tparam LMatrixPolicy Policy for the Lower matrix
/// @tparam UMatrixPolicy Policy for the Upper matrix
template<
class SolverParametersPolicy,
class DenseMatrixPolicy = Matrix<double>,
Expand All @@ -133,6 +136,25 @@ namespace micm
LinearSolver<SparseMatrixPolicy, LuDecompositionPolicy, LMatrixPolicy, UMatrixPolicy>,
State<DenseMatrixPolicy, SparseMatrixPolicy, LuDecompositionPolicy, LMatrixPolicy, UMatrixPolicy>>;

/// @brief Builder of CPU-based general solvers with in-place LU decomposition
/// @tparam SolverParametersPolicy Parameters for the ODE solver
/// @tparam DenseMatrixPolicy Policy for dense matrices
/// @tparam SparseMatrixPolicy Policy for sparse matrices
/// @tparam LuDecompositionPolicy Policy for the LU decomposition
template<
class SolverParametersPolicy,
class DenseMatrix = Matrix<double>,
class SparseMatrixPolicy = SparseMatrix<double, SparseMatrixStandardOrdering>,
class LuDecompositionPolicy = LuDecompositionInPlace>
using CpuSolverBuilderInPlace = SolverBuilder<
SolverParametersPolicy,
DenseMatrix,
SparseMatrixPolicy,
ProcessSet,
LuDecompositionPolicy,
LinearSolverInPlace<SparseMatrixPolicy, LuDecompositionPolicy>,
State<DenseMatrix, SparseMatrixPolicy, LuDecompositionPolicy>>;

} // namespace micm

#include "solver_builder.inl"
Loading

0 comments on commit f34dfca

Please sign in to comment.