-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1cbd28e
commit 70a2956
Showing
8 changed files
with
600 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
// Copyright (C) 2023-2024 National Center for Atmospheric Research | ||
// SPDX-License-Identifier: Apache-2.0 | ||
#pragma once | ||
|
||
#include <micm/profiler/instrumentation.hpp> | ||
#include <micm/util/sparse_matrix.hpp> | ||
|
||
namespace micm | ||
{ | ||
|
||
/// @brief LU decomposer for SparseMatrix following the algorithm from the MOZART model | ||
/// | ||
/// This LU decomposition uses the algorithm from the MOZART chemistry preprocessor at: | ||
/// https://github.com/ESCOMP/CHEM_PREPROCESSOR/blob/f923081508f4264e61fcef2753a9898e55d1598e/src/cam_chempp/make_lu_fac.f#L127-L214 | ||
/// | ||
/// The MOZART function overwrote the A matrix with the L and U matrices. The pseudo-code | ||
/// in C++ for the corresponding dense matrix algorithm for matrix A (inline change) would be: | ||
/// | ||
/// for i = 0...n-1 // Outer loop over columns of the sparse matrix A | ||
/// for j = i+1...n-1 // Multiply column below diagonal | ||
/// A[j][i] = A[j][i] / A[i][i] | ||
/// for k = i+1...n-1 // Modify sub-matrix | ||
/// for j = i+1...n-1 | ||
/// A[j][k] = A[j][k] – A[j][i] * A[i][k] | ||
/// | ||
/// For the sparse matrix algorithm, the indices of non-zero terms are stored in | ||
/// several arrays during construction. These arrays are iterated through during | ||
/// calls to Decompose to do the actual decomposition. | ||
/// | ||
/// The GetLUMatrices function creates a new sparse matrix that includes the superset | ||
/// of the non-zero elements in the L and U matrices. It is expected that the elements | ||
/// of the L and U matrices that are zero in the A matrix will be set to zero before the | ||
/// combined matrix is passed to the decomposition function. | ||
class LuDecompositionMozartInPlace | ||
{ | ||
protected: | ||
/// Index in A.data_ for all diagonal elements, the number of iterations of the inner (j) loop | ||
/// for each (i) used to set A[j][i], and the number of iterations of the middle (k) loop for | ||
/// each (i) used to set A[j][k] | ||
std::vector<std::tuple<std::size_t, std::size_t, std::size_t>> aii_nji_nki_; | ||
/// Index in A.data_ for A[j][i] for each iteration of the inner (j) loop | ||
/// used to set the value of A[j][i] | ||
std::vector<std::size_t> aji_; | ||
/// Index in A.data_ for A[i][k] for each iteration of the middle (k) loop, | ||
/// and the number of iterations of the inner (j) loop for each (k) used to set A[j][k] | ||
std::vector<std::pair<std::size_t, std::size_t>> aik_njk_; | ||
/// Index in A.data_ for A[j][k] and A[j][i] for each iteration of the inner (j) loop | ||
std::vector<std::pair<std::size_t, std::size_t>> ajk_aji_; | ||
|
||
public: | ||
/// @brief default constructor | ||
LuDecompositionMozartInPlace(); | ||
|
||
LuDecompositionMozartInPlace(const LuDecompositionMozartInPlace&) = delete; | ||
LuDecompositionMozartInPlace& operator=(const LuDecompositionMozartInPlace&) = delete; | ||
|
||
LuDecompositionMozartInPlace(LuDecompositionMozartInPlace&& other) = default; | ||
LuDecompositionMozartInPlace& operator=(LuDecompositionMozartInPlace&&) = default; | ||
|
||
/// @brief Construct an LU decomposition algorithm for a given sparse matrix | ||
/// @param matrix Sparse matrix | ||
template<class SparseMatrixPolicy> | ||
requires(SparseMatrixConcept<SparseMatrixPolicy>) LuDecompositionMozartInPlace(const SparseMatrixPolicy& matrix); | ||
|
||
~LuDecompositionMozartInPlace() = default; | ||
|
||
/// @brief Create an LU decomposition algorithm for a given sparse matrix policy | ||
/// @param matrix Sparse matrix | ||
template<class SparseMatrixPolicy> | ||
requires(SparseMatrixConcept<SparseMatrixPolicy>) static LuDecompositionMozartInPlace Create(const SparseMatrixPolicy& matrix); | ||
|
||
/// @brief Create a combined sparse L and U matrix for a given A matrix | ||
/// @param A Sparse matrix that will be decomposed | ||
/// @return combined L and U Sparse matrices | ||
template<class SparseMatrixPolicy> | ||
requires(SparseMatrixConcept<SparseMatrixPolicy>) static SparseMatrixPolicy GetLUMatrix( | ||
const SparseMatrixPolicy& A, | ||
typename SparseMatrixPolicy::value_type initial_value); | ||
|
||
/// @brief Perform an LU decomposition on a given A matrix. | ||
/// All elements of L and U that are zero in A should be set to zero before calling this function. | ||
/// @param ALU Sparse matrix to decompose (will be overwritten with L and U matrices) | ||
template<class SparseMatrixPolicy> | ||
requires(!VectorizableSparse<SparseMatrixPolicy>) void Decompose( | ||
SparseMatrixPolicy& ALU) const; | ||
template<class SparseMatrixPolicy> | ||
requires(VectorizableSparse<SparseMatrixPolicy>) void Decompose( | ||
SparseMatrixPolicy& ALU) const; | ||
|
||
private: | ||
/// @brief Initialize arrays for the LU decomposition | ||
/// @param A Sparse matrix to decompose | ||
template<class SparseMatrixPolicy> | ||
requires(SparseMatrixConcept<SparseMatrixPolicy>) void Initialize(const SparseMatrixPolicy& matrix, auto initial_value); | ||
}; | ||
|
||
} // namespace micm | ||
|
||
#include "lu_decomposition_mozart_in_place.inl" |
183 changes: 183 additions & 0 deletions
183
include/micm/solver/lu_decomposition_mozart_in_place.inl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
// Copyright (C) 2023-2024 National Center for Atmospheric Research | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
namespace micm | ||
{ | ||
|
||
inline LuDecompositionMozartInPlace::LuDecompositionMozartInPlace() | ||
{ | ||
} | ||
|
||
template<class SparseMatrixPolicy> | ||
requires(SparseMatrixConcept<SparseMatrixPolicy>) inline LuDecompositionMozartInPlace::LuDecompositionMozartInPlace(const SparseMatrixPolicy& matrix) | ||
{ | ||
Initialize<SparseMatrixPolicy>(matrix, typename SparseMatrixPolicy::value_type()); | ||
} | ||
|
||
template<class SparseMatrixPolicy> | ||
requires(SparseMatrixConcept<SparseMatrixPolicy>) inline LuDecompositionMozartInPlace LuDecompositionMozartInPlace::Create( | ||
const SparseMatrixPolicy& matrix) | ||
{ | ||
LuDecompositionMozartInPlace lu_decomp{}; | ||
lu_decomp.Initialize<SparseMatrixPolicy>(matrix, typename SparseMatrixPolicy::value_type()); | ||
return lu_decomp; | ||
} | ||
|
||
template<class SparseMatrixPolicy> | ||
requires(SparseMatrixConcept<SparseMatrixPolicy>) inline void LuDecompositionMozartInPlace::Initialize( | ||
const SparseMatrixPolicy& matrix, | ||
auto initial_value) | ||
{ | ||
MICM_PROFILE_FUNCTION(); | ||
|
||
std::size_t n = matrix.NumRows(); | ||
auto ALU = GetLUMatrix<SparseMatrixPolicy>(matrix, initial_value); | ||
for (std::size_t i = 0; i < n; ++i) | ||
{ | ||
if (ALU.IsZero(i, i)) { | ||
throw std::runtime_error("Diagonal element is zero in LU decomposition"); | ||
} | ||
std::tuple<std::size_t, std::size_t, std::size_t> aii_nji_nki(ALU.VectorIndex(0, i, i), 0, 0); | ||
for (std::size_t j = i + 1; j < n; ++j) | ||
{ | ||
if (ALU.IsZero(j, i)) | ||
continue; | ||
aji_.push_back(ALU.VectorIndex(0, j, i)); | ||
++(std::get<1>(aii_nji_nki)); | ||
} | ||
for (std::size_t k = i + 1; k < n; ++k) | ||
{ | ||
if (ALU.IsZero(i, k)) | ||
continue; | ||
std::pair<std::size_t, std::size_t> aik_njk(ALU.VectorIndex(0, i, k), 0); | ||
for (std::size_t j = i + 1; j < n; ++j) | ||
{ | ||
if (ALU.IsZero(j, i)) | ||
continue; | ||
std::pair<std::size_t, std::size_t> ajk_aji(ALU.VectorIndex(0, j, k), ALU.VectorIndex(0, j, i)); | ||
ajk_aji_.push_back(ajk_aji); | ||
++(std::get<1>(aik_njk)); | ||
} | ||
aik_njk_.push_back(aik_njk); | ||
++(std::get<2>(aii_nji_nki)); | ||
} | ||
aii_nji_nki_.push_back(aii_nji_nki); | ||
} | ||
} | ||
|
||
template<class SparseMatrixPolicy> | ||
requires( | ||
SparseMatrixConcept<SparseMatrixPolicy>) inline SparseMatrixPolicy LuDecompositionMozartInPlace:: | ||
GetLUMatrix(const SparseMatrixPolicy& A, typename SparseMatrixPolicy::value_type initial_value) | ||
{ | ||
MICM_PROFILE_FUNCTION(); | ||
|
||
std::size_t n = A.NumRows(); | ||
std::set<std::pair<std::size_t, std::size_t>> ALU_ids; | ||
for (std::size_t i = 0; i < n; ++i) | ||
{ | ||
for (std::size_t j = 0; j < n; ++j) | ||
if (!A.IsZero(i, j)) | ||
ALU_ids.insert(std::make_pair(i, j)); | ||
} | ||
for (std::size_t i = 0; i < n; ++i) | ||
{ | ||
for (std::size_t j = i + 1; j < n; ++j) | ||
if (std::find(ALU_ids.begin(), ALU_ids.end(), std::make_pair(j, i)) != ALU_ids.end()) | ||
ALU_ids.insert(std::make_pair(j, i)); | ||
for (std::size_t k = i + 1; k < n; ++k) | ||
if (std::find(ALU_ids.begin(), ALU_ids.end(), std::make_pair(i, k)) != ALU_ids.end()) | ||
for (std::size_t j = i + 1; j < n; ++j) | ||
if (std::find(ALU_ids.begin(), ALU_ids.end(), std::make_pair(j, i)) != ALU_ids.end()) | ||
ALU_ids.insert(std::make_pair(j, k)); | ||
} | ||
auto ALU_builder = SparseMatrixPolicy::Create(n).SetNumberOfBlocks(A.NumberOfBlocks()).InitialValue(initial_value); | ||
for (auto& pair : ALU_ids) | ||
{ | ||
ALU_builder = ALU_builder.WithElement(pair.first, pair.second); | ||
} | ||
return SparseMatrixPolicy(ALU_builder); | ||
} | ||
|
||
template<class SparseMatrixPolicy> | ||
requires(!VectorizableSparse<SparseMatrixPolicy>) inline void LuDecompositionMozartInPlace::Decompose( | ||
SparseMatrixPolicy& ALU) const | ||
{ | ||
MICM_PROFILE_FUNCTION(); | ||
const std::size_t n = ALU.NumRows(); | ||
|
||
// Loop over blocks | ||
for (std::size_t i_block = 0; i_block < ALU.NumberOfBlocks(); ++i_block) | ||
{ | ||
auto ALU_vector = std::next(ALU.AsVector().begin(), i_block * ALU.FlatBlockSize()); | ||
auto aji = aji_.begin(); | ||
auto aik_njk = aik_njk_.begin(); | ||
auto ajk_aji = ajk_aji_.begin(); | ||
|
||
for (const auto& aii_nji_nki : aii_nji_nki_) | ||
{ | ||
const typename SparseMatrixPolicy::value_type Aii_inverse = 1.0 / ALU_vector[std::get<0>(aii_nji_nki)]; | ||
for (std::size_t ij = 0; ij < std::get<1>(aii_nji_nki); ++ij) | ||
{ | ||
ALU_vector[*aji] *= Aii_inverse; | ||
++aji; | ||
} | ||
for (std::size_t ik = 0; ik < std::get<2>(aii_nji_nki); ++ik) | ||
{ | ||
const typename SparseMatrixPolicy::value_type Aik = ALU_vector[std::get<0>(*aik_njk)]; | ||
for (std::size_t ijk = 0; ijk < std::get<1>(*aik_njk); ++ijk) | ||
{ | ||
ALU_vector[ajk_aji->first] -= ALU_vector[ajk_aji->second] * Aik; | ||
++ajk_aji; | ||
} | ||
++aik_njk; | ||
} | ||
} | ||
} | ||
} | ||
|
||
template<class SparseMatrixPolicy> | ||
requires(VectorizableSparse<SparseMatrixPolicy>) inline void LuDecompositionMozartInPlace::Decompose( | ||
SparseMatrixPolicy& ALU) const | ||
{ | ||
MICM_PROFILE_FUNCTION(); | ||
|
||
const std::size_t n = ALU.NumRows(); | ||
const std::size_t ALU_BlockSize = ALU.NumberOfBlocks(); | ||
constexpr std::size_t ALU_GroupVectorSize = SparseMatrixPolicy::GroupVectorSize(); | ||
const std::size_t ALU_GroupSizeOfFlatBlockSize = ALU.GroupSize(); | ||
double Aii_inverse[ALU_GroupVectorSize]; | ||
|
||
// Loop over groups of blocks | ||
for (std::size_t i_group = 0; i_group < ALU.NumberOfGroups(ALU_BlockSize); ++i_group) | ||
{ | ||
auto ALU_vector = std::next(ALU.AsVector().begin(), i_group * ALU_GroupSizeOfFlatBlockSize); | ||
const std::size_t n_cells = std::min(ALU_GroupVectorSize, ALU_BlockSize - i_group * ALU_GroupVectorSize); | ||
auto aji = aji_.begin(); | ||
auto aik_njk = aik_njk_.begin(); | ||
auto ajk_aji = ajk_aji_.begin(); | ||
for (const auto& aii_nji_nki : aii_nji_nki_) | ||
{ | ||
for (std::size_t i = 0; i < n_cells; ++i) | ||
Aii_inverse[i] = 1.0 / ALU_vector[std::get<0>(aii_nji_nki) + i]; | ||
for (std::size_t ij = 0; ij < std::get<1>(aii_nji_nki); ++ij) | ||
{ | ||
for (std::size_t i = 0; i < n_cells; ++i) | ||
ALU_vector[*aji + i] *= Aii_inverse[i]; | ||
++aji; | ||
} | ||
for (std::size_t ik = 0; ik < std::get<2>(aii_nji_nki); ++ik) | ||
{ | ||
const std::size_t aik = std::get<0>(*aik_njk); | ||
for (std::size_t ijk = 0; ijk < std::get<1>(*aik_njk); ++ijk) | ||
{ | ||
for (std::size_t i = 0; i < n_cells; ++i) | ||
ALU_vector[ajk_aji->first + i] -= ALU_vector[ajk_aji->second + i] * ALU_vector[aik + i]; | ||
++ajk_aji; | ||
} | ||
++aik_njk; | ||
} | ||
} | ||
} | ||
} | ||
} // namespace micm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.