-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
452 additions
and
46 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
106 changes: 106 additions & 0 deletions
106
include/micm/solver/lu_decomposition_doolittle_in_place.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
// Copyright (C) 2023-2024 National Center for Atmospheric Research | ||
// SPDX-License-Identifier: Apache-2.0 | ||
#pragma once | ||
|
||
#include <micm/profiler/instrumentation.hpp> | ||
#include <micm/util/sparse_matrix.hpp> | ||
|
||
namespace micm | ||
{ | ||
|
||
/// @brief LU decomposer for SparseMatrix following the Doolittle algorithm | ||
/// | ||
/// The LU decomposition uses the Doolittle algorithm following the | ||
/// naming used here: https://www.geeksforgeeks.org/doolittle-algorithm-lu-decomposition/ | ||
/// | ||
/// The sudo-code for the corresponding dense matrix algorithm for matrix A (in-line) would be: | ||
/// | ||
/// for i = 0...n-1 // Outer loop over rows (columns) for upper (lower) triangular matrix | ||
/// for k = i...n-1 // Middle loop over columns for upper triangular matrix | ||
/// for j = 0...i-1 // Inner loop over columns (rows) for lower (upper) triangular matrix | ||
/// A[i][k] -= A[i][j] * A[j][k] | ||
/// for k = i+1...n-1 // Middle loop over rows for lower triangular matrix | ||
/// for j = 0...i-1 // Inner loop over columns (rows) for lower (upper) triangular matrix | ||
/// A[k][i] -= A[k][j] * A[j][i]; | ||
/// A[k][i] /= A[i][i] | ||
/// | ||
/// For the sparse matrix algorithm, the indices of non-zero terms are stored in | ||
/// several arrays during construction. These arrays are iterated through during | ||
/// calls to Decompose to do the actual decomposition. | ||
/// Our LU Decomposition only assigns the values of the jacobian to the LU matrices | ||
/// when the *jacobian* is nonzero. However, the sparsity pattern of the jacobian doesn't | ||
/// necessarily match that of the LU matrices. There can be more nonzero elements in the LU matrices | ||
/// than in the jacobian. It is expected that the elements of the L and U matrices that are zero in the A matrix | ||
/// will be set to zero before the combined matrix is passed to the decomposition function. | ||
class LuDecompositionDoolittleInPlace | ||
{ | ||
protected: | ||
/// Number of elements in the middle (k) loops for lower and upper triangular matrices, respectively, | ||
/// and the index in A.data_ for A[i][i] for each iteration of the outer (i) loop | ||
std::vector<std::tuple<std::size_t, std::size_t, std::size_t>> nik_nki_aii_; | ||
/// Index in A.data_ for A[i][k] for each iteration of the upper middle (k) loop and the | ||
/// number of elements in the inner (j) loop for each upper (k) element used to set A[i][k] | ||
std::vector<std::pair<std::size_t, std::size_t>> aik_njk_; | ||
/// Index in A.data_ for A[i][j] and A[j][k] for each iteration of the upper inner (j) loop | ||
std::vector<std::pair<std::size_t, std::size_t>> aij_ajk_; | ||
/// Index in A.data_ for A[k][i] for each iteration of the lower middle (k) loop, the | ||
/// number of elements in the inner (j) loop for each lower (k) element used to set A[k][i] | ||
std::vector<std::pair<std::size_t, std::size_t>> aki_nji_; | ||
/// Index in A.data_ for A[k][j] and A[j][i] for each iteration of the lower inner (j) loop | ||
std::vector<std::pair<std::size_t, std::size_t>> akj_aji_; | ||
|
||
public: | ||
/// @brief default constructor | ||
LuDecompositionDoolittleInPlace(); | ||
|
||
LuDecompositionDoolittleInPlace(const LuDecompositionDoolittleInPlace&) = delete; | ||
LuDecompositionDoolittleInPlace& operator=(const LuDecompositionDoolittleInPlace&) = delete; | ||
|
||
LuDecompositionDoolittleInPlace(LuDecompositionDoolittleInPlace&& other) = default; | ||
LuDecompositionDoolittleInPlace& operator=(LuDecompositionDoolittleInPlace&&) = default; | ||
|
||
/// @brief Construct an LU decomposition algorithm for a given sparse matrix | ||
/// @param matrix Sparse matrix | ||
template<class SparseMatrixPolicy> | ||
requires(SparseMatrixConcept<SparseMatrixPolicy>) | ||
LuDecompositionDoolittleInPlace(const SparseMatrixPolicy& matrix); | ||
|
||
~LuDecompositionDoolittleInPlace() = default; | ||
|
||
/// @brief Create an LU decomposition algorithm for a given sparse matrix policy | ||
/// @param matrix Sparse matrix | ||
template<class SparseMatrixPolicy> | ||
requires(SparseMatrixConcept<SparseMatrixPolicy>) | ||
static LuDecompositionDoolittleInPlace Create(const SparseMatrixPolicy& matrix); | ||
|
||
/// @brief Create sparse L and U matrices for a given A matrix | ||
/// @param A Sparse matrix that will be decomposed | ||
/// @return L and U Sparse matrices | ||
template<class SparseMatrixPolicy> | ||
requires(SparseMatrixConcept<SparseMatrixPolicy>) | ||
static SparseMatrixPolicy GetLUMatrix( | ||
const SparseMatrixPolicy& A, | ||
typename SparseMatrixPolicy::value_type initial_value); | ||
|
||
/// @brief Perform an LU decomposition on a given A matrix | ||
/// @param A Sparse matrix to decompose | ||
/// @param L The lower triangular matrix created by decomposition | ||
/// @param U The upper triangular matrix created by decomposition | ||
template<class SparseMatrixPolicy> | ||
requires(!VectorizableSparse<SparseMatrixPolicy>) | ||
void Decompose(SparseMatrixPolicy& ALU) const; | ||
template<class SparseMatrixPolicy> | ||
requires(VectorizableSparse<SparseMatrixPolicy>) | ||
void Decompose(SparseMatrixPolicy& ALU) const; | ||
|
||
protected: | ||
/// @brief Initialize arrays for the LU decomposition | ||
/// @param A Sparse matrix to decompose | ||
template<class SparseMatrixPolicy> | ||
requires(SparseMatrixConcept<SparseMatrixPolicy>) | ||
void Initialize(const SparseMatrixPolicy& matrix, auto initial_value); | ||
}; | ||
|
||
} // namespace micm | ||
|
||
#include "lu_decomposition_doolittle_in_place.inl" |
220 changes: 220 additions & 0 deletions
220
include/micm/solver/lu_decomposition_doolittle_in_place.inl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,220 @@ | ||
// Copyright (C) 2023-2024 National Center for Atmospheric Research | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
namespace micm | ||
{ | ||
|
||
inline LuDecompositionDoolittleInPlace::LuDecompositionDoolittleInPlace() | ||
{ | ||
} | ||
|
||
template<class SparseMatrixPolicy> | ||
requires(SparseMatrixConcept<SparseMatrixPolicy>) | ||
inline LuDecompositionDoolittleInPlace::LuDecompositionDoolittleInPlace(const SparseMatrixPolicy& matrix) | ||
{ | ||
Initialize<SparseMatrixPolicy>(matrix, typename SparseMatrixPolicy::value_type()); | ||
} | ||
|
||
template<class SparseMatrixPolicy> | ||
requires(SparseMatrixConcept<SparseMatrixPolicy>) | ||
inline LuDecompositionDoolittleInPlace LuDecompositionDoolittleInPlace::Create(const SparseMatrixPolicy& matrix) | ||
{ | ||
LuDecompositionDoolittleInPlace lu_decomp{}; | ||
lu_decomp.Initialize<SparseMatrixPolicy>(matrix, typename SparseMatrixPolicy::value_type()); | ||
return lu_decomp; | ||
} | ||
|
||
template<class SparseMatrixPolicy> | ||
requires(SparseMatrixConcept<SparseMatrixPolicy>) | ||
inline void LuDecompositionDoolittleInPlace::Initialize(const SparseMatrixPolicy& matrix, auto initial_value) | ||
{ | ||
MICM_PROFILE_FUNCTION(); | ||
|
||
std::size_t n = matrix.NumRows(); | ||
auto ALU = GetLUMatrix<SparseMatrixPolicy>(matrix, initial_value); | ||
for (std::size_t i = 0; i < n; ++i) | ||
{ | ||
if (ALU.IsZero(i, i)) | ||
{ | ||
throw std::runtime_error("Diagonal element is zero in LU decomposition"); | ||
} | ||
std::tuple<std::size_t, std::size_t, std::size_t> nik_nki_aii(0, 0, ALU.VectorIndex(0, i, i)); | ||
for (std::size_t k = i; k < n; ++k) | ||
{ | ||
if (ALU.IsZero(i, k)) | ||
continue; | ||
std::pair<std::size_t, std::size_t> aik_njk(ALU.VectorIndex(0, i, k), 0); | ||
for (std::size_t j = 0; j < i; ++j) | ||
{ | ||
if (ALU.IsZero(i, j) || ALU.IsZero(j, k)) | ||
continue; | ||
aij_ajk_.push_back(std::make_pair(ALU.VectorIndex(0, i, j), ALU.VectorIndex(0, j, k))); | ||
++(aik_njk.second); | ||
} | ||
aik_njk_.push_back(aik_njk); | ||
++(std::get<0>(nik_nki_aii)); | ||
} | ||
for (std::size_t k = i + 1; k < n; ++k) | ||
{ | ||
if (ALU.IsZero(k, i)) | ||
continue; | ||
std::pair<std::size_t, std::size_t> aki_nji(ALU.VectorIndex(0, k, i), 0); | ||
for (std::size_t j = 0; j < i; ++j) | ||
{ | ||
if (ALU.IsZero(k, j) || ALU.IsZero(j, i)) | ||
continue; | ||
akj_aji_.push_back(std::make_pair(ALU.VectorIndex(0, k, j), ALU.VectorIndex(0, j, i))); | ||
++(aki_nji.second); | ||
} | ||
aki_nji_.push_back(aki_nji); | ||
++(std::get<1>(nik_nki_aii)); | ||
} | ||
nik_nki_aii_.push_back(nik_nki_aii); | ||
} | ||
} | ||
|
||
template<class SparseMatrixPolicy> | ||
requires(SparseMatrixConcept<SparseMatrixPolicy>) | ||
inline SparseMatrixPolicy LuDecompositionDoolittleInPlace::GetLUMatrix( | ||
const SparseMatrixPolicy& A, | ||
typename SparseMatrixPolicy::value_type initial_value) | ||
{ | ||
MICM_PROFILE_FUNCTION(); | ||
|
||
std::size_t n = A.NumRows(); | ||
std::set<std::pair<std::size_t, std::size_t>> ALU_ids; | ||
for (std::size_t i = 0; i < n; ++i) | ||
{ | ||
// Upper triangular matrix | ||
for (std::size_t k = i; k < n; ++k) | ||
{ | ||
if (!A.IsZero(i, k) || k == i) | ||
{ | ||
ALU_ids.insert(std::make_pair(i, k)); | ||
continue; | ||
} | ||
for (std::size_t j = 0; j < i; ++j) | ||
{ | ||
if (ALU_ids.find(std::make_pair(i, j)) != ALU_ids.end() && ALU_ids.find(std::make_pair(j, k)) != ALU_ids.end()) | ||
{ | ||
ALU_ids.insert(std::make_pair(i, k)); | ||
break; | ||
} | ||
} | ||
} | ||
// Lower triangular matrix | ||
for (std::size_t k = i; k < n; ++k) | ||
{ | ||
if (!A.IsZero(k, i) || k == i) | ||
{ | ||
ALU_ids.insert(std::make_pair(k, i)); | ||
continue; | ||
} | ||
for (std::size_t j = 0; j < i; ++j) | ||
{ | ||
if (ALU_ids.find(std::make_pair(k, j)) != ALU_ids.end() && ALU_ids.find(std::make_pair(j, i)) != ALU_ids.end()) | ||
{ | ||
ALU_ids.insert(std::make_pair(k, i)); | ||
break; | ||
} | ||
} | ||
} | ||
} | ||
auto ALU_builder = SparseMatrixPolicy::Create(n).SetNumberOfBlocks(A.NumberOfBlocks()).InitialValue(initial_value); | ||
for (auto& pair : ALU_ids) | ||
{ | ||
ALU_builder = ALU_builder.WithElement(pair.first, pair.second); | ||
} | ||
return SparseMatrixPolicy(ALU_builder); | ||
} | ||
|
||
template<class SparseMatrixPolicy> | ||
requires(!VectorizableSparse<SparseMatrixPolicy>) | ||
inline void LuDecompositionDoolittleInPlace::Decompose(SparseMatrixPolicy& ALU) const | ||
{ | ||
MICM_PROFILE_FUNCTION(); | ||
const std::size_t n = ALU.NumRows(); | ||
|
||
// Loop over blocks | ||
for (std::size_t i_block = 0; i_block < ALU.NumberOfBlocks(); ++i_block) | ||
{ | ||
auto ALU_vector = std::next(ALU.AsVector().begin(), i_block * ALU.FlatBlockSize()); | ||
auto aik_njk = aik_njk_.begin(); | ||
auto aij_ajk = aij_ajk_.begin(); | ||
auto aki_nji = aki_nji_.begin(); | ||
auto akj_aji = akj_aji_.begin(); | ||
|
||
for (const auto& nik_nki_aii : nik_nki_aii_) | ||
{ | ||
for (std::size_t ik = 0; ik < std::get<0>(nik_nki_aii); ++ik) | ||
{ | ||
for (std::size_t jk = 0; jk < aik_njk->second; ++jk) | ||
{ | ||
ALU_vector[aik_njk->first] -= ALU_vector[aij_ajk->first] * ALU_vector[aij_ajk->second]; | ||
++aij_ajk; | ||
} | ||
++aik_njk; | ||
} | ||
for (std::size_t ki = 0; ki < std::get<1>(nik_nki_aii); ++ki) | ||
{ | ||
for (std::size_t ji = 0; ji < aki_nji->second; ++ji) | ||
{ | ||
ALU_vector[aki_nji->first] -= ALU_vector[akj_aji->first] * ALU_vector[akj_aji->second]; | ||
++akj_aji; | ||
} | ||
ALU_vector[aki_nji->first] /= ALU_vector[std::get<2>(nik_nki_aii)]; | ||
++aki_nji; | ||
} | ||
} | ||
} | ||
} | ||
|
||
template<class SparseMatrixPolicy> | ||
requires(VectorizableSparse<SparseMatrixPolicy>) | ||
inline void LuDecompositionDoolittleInPlace::Decompose(SparseMatrixPolicy& ALU) const | ||
{ | ||
MICM_PROFILE_FUNCTION(); | ||
|
||
const std::size_t n = ALU.NumRows(); | ||
const std::size_t ALU_BlockSize = ALU.NumberOfBlocks(); | ||
constexpr std::size_t ALU_GroupVectorSize = SparseMatrixPolicy::GroupVectorSize(); | ||
const std::size_t ALU_GroupSizeOfFlatBlockSize = ALU.GroupSize(); | ||
|
||
// Loop over groups of blocks | ||
for (std::size_t i_group = 0; i_group < ALU.NumberOfGroups(ALU_BlockSize); ++i_group) | ||
{ | ||
auto ALU_vector = std::next(ALU.AsVector().begin(), i_group * ALU_GroupSizeOfFlatBlockSize); | ||
const std::size_t n_cells = std::min(ALU_GroupVectorSize, ALU_BlockSize - i_group * ALU_GroupVectorSize); | ||
auto aik_njk = aik_njk_.begin(); | ||
auto aij_ajk = aij_ajk_.begin(); | ||
auto aki_nji = aki_nji_.begin(); | ||
auto akj_aji = akj_aji_.begin(); | ||
for (const auto& nik_nki_aii : nik_nki_aii_) | ||
{ | ||
for (std::size_t ik = 0; ik < std::get<0>(nik_nki_aii); ++ik) | ||
{ | ||
for (std::size_t jk = 0; jk < aik_njk->second; ++jk) | ||
{ | ||
for (std::size_t i = 0; i < n_cells; ++i) | ||
ALU_vector[aik_njk->first + i] -= ALU_vector[aij_ajk->first + i] * ALU_vector[aij_ajk->second + i]; | ||
++aij_ajk; | ||
} | ||
++aik_njk; | ||
} | ||
for (std::size_t ki = 0; ki < std::get<1>(nik_nki_aii); ++ki) | ||
{ | ||
for (std::size_t ji = 0; ji < aki_nji->second; ++ji) | ||
{ | ||
for (std::size_t i = 0; i < n_cells; ++i) | ||
ALU_vector[aki_nji->first + i] -= ALU_vector[akj_aji->first + i] * ALU_vector[akj_aji->second + i]; | ||
++akj_aji; | ||
} | ||
for (std::size_t i = 0; i < n_cells; ++i) | ||
ALU_vector[aki_nji->first + i] /= ALU_vector[std::get<2>(nik_nki_aii) + i]; | ||
++aki_nji; | ||
} | ||
} | ||
} | ||
} | ||
|
||
} // namespace micm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.