Skip to content

Commit

Permalink
Merge branch 'main' into develop-695-in-place-lu
Browse files Browse the repository at this point in the history
  • Loading branch information
mattldawson committed Jan 9, 2025
2 parents b529224 + 19425fb commit 0c93161
Show file tree
Hide file tree
Showing 55 changed files with 714 additions and 345 deletions.
44 changes: 24 additions & 20 deletions include/micm/cuda/process/cuda_process_set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,25 +52,27 @@ namespace micm
void SetJacobianFlatIds(const SparseMatrix<double, OrderingPolicy>& matrix);

template<typename MatrixPolicy>
requires(CudaMatrix<MatrixPolicy>&& VectorizableDense<MatrixPolicy>) void AddForcingTerms(
const MatrixPolicy& rate_constants,
const MatrixPolicy& state_variables,
MatrixPolicy& forcing) const;
requires(CudaMatrix<MatrixPolicy> && VectorizableDense<MatrixPolicy>)
void AddForcingTerms(const MatrixPolicy& rate_constants, const MatrixPolicy& state_variables, MatrixPolicy& forcing)
const;

template<typename MatrixPolicy>
requires(!CudaMatrix<MatrixPolicy>) void AddForcingTerms(
const MatrixPolicy& rate_constants,
const MatrixPolicy& state_variables,
MatrixPolicy& forcing) const;
requires(!CudaMatrix<MatrixPolicy>)
void AddForcingTerms(const MatrixPolicy& rate_constants, const MatrixPolicy& state_variables, MatrixPolicy& forcing)
const;

template<class MatrixPolicy, class SparseMatrixPolicy>
requires(
CudaMatrix<MatrixPolicy>&& CudaMatrix<SparseMatrixPolicy>&& VectorizableDense<MatrixPolicy>&& VectorizableSparse<
SparseMatrixPolicy>) void SubtractJacobianTerms(const MatrixPolicy& rate_constants, const MatrixPolicy& state_variables, SparseMatrixPolicy& jacobian)
const;
requires(
CudaMatrix<MatrixPolicy> && CudaMatrix<SparseMatrixPolicy> && VectorizableDense<MatrixPolicy> &&
VectorizableSparse<SparseMatrixPolicy>)
void SubtractJacobianTerms(
const MatrixPolicy& rate_constants,
const MatrixPolicy& state_variables,
SparseMatrixPolicy& jacobian) const;

template<class MatrixPolicy, class SparseMatrixPolicy>
requires(!CudaMatrix<MatrixPolicy> && !CudaMatrix<SparseMatrixPolicy>) void SubtractJacobianTerms(
requires(!CudaMatrix<MatrixPolicy> && !CudaMatrix<SparseMatrixPolicy>)
void SubtractJacobianTerms(
const MatrixPolicy& rate_constants,
const MatrixPolicy& state_variables,
SparseMatrixPolicy& jacobian) const;
Expand Down Expand Up @@ -119,7 +121,8 @@ namespace micm
}

template<class MatrixPolicy>
requires(CudaMatrix<MatrixPolicy>&& VectorizableDense<MatrixPolicy>) inline void CudaProcessSet::AddForcingTerms(
requires(CudaMatrix<MatrixPolicy> && VectorizableDense<MatrixPolicy>)
inline void CudaProcessSet::AddForcingTerms(
const MatrixPolicy& rate_constants,
const MatrixPolicy& state_variables,
MatrixPolicy& forcing) const
Expand All @@ -130,12 +133,13 @@ namespace micm
}

template<class MatrixPolicy, class SparseMatrixPolicy>
requires(CudaMatrix<MatrixPolicy>&& CudaMatrix<SparseMatrixPolicy>&& VectorizableDense<MatrixPolicy>&&
VectorizableSparse<SparseMatrixPolicy>) inline void CudaProcessSet::
SubtractJacobianTerms(
const MatrixPolicy& rate_constants,
const MatrixPolicy& state_variables,
SparseMatrixPolicy& jacobian) const
requires(
CudaMatrix<MatrixPolicy> && CudaMatrix<SparseMatrixPolicy> && VectorizableDense<MatrixPolicy> &&
VectorizableSparse<SparseMatrixPolicy>)
inline void CudaProcessSet::SubtractJacobianTerms(
const MatrixPolicy& rate_constants,
const MatrixPolicy& state_variables,
SparseMatrixPolicy& jacobian) const
{
auto jacobian_param =
jacobian.AsDeviceParam(); // we need to update jacobian so it can't be constant and must be an lvalue
Expand Down
7 changes: 4 additions & 3 deletions include/micm/cuda/solver/cuda_linear_solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,10 @@ namespace micm
};

template<class MatrixPolicy>
requires(
CudaMatrix<SparseMatrixPolicy>&& CudaMatrix<MatrixPolicy>&& VectorizableDense<MatrixPolicy>&& VectorizableSparse<
SparseMatrixPolicy>) void Solve(MatrixPolicy& x, const SparseMatrixPolicy& L, const SparseMatrixPolicy& U) const
requires(
CudaMatrix<SparseMatrixPolicy> && CudaMatrix<MatrixPolicy> && VectorizableDense<MatrixPolicy> &&
VectorizableSparse<SparseMatrixPolicy>)
void Solve(MatrixPolicy& x, const SparseMatrixPolicy& L, const SparseMatrixPolicy& U) const
{
auto x_param = x.AsDeviceParam(); // we need to update x so it can't be constant and must be an lvalue
micm::cuda::SolveKernelDriver(x_param, L.AsDeviceParam(), U.AsDeviceParam(), this->devstruct_);
Expand Down
21 changes: 12 additions & 9 deletions include/micm/cuda/solver/cuda_lu_decomposition_doolittle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,15 @@ namespace micm
/// @brief Create an LU decomposition algorithm for a given sparse matrix policy
/// @param matrix Sparse matrix
template<class SparseMatrixPolicy, class LMatrixPolicy = SparseMatrixPolicy, class UMatrixPolicy = SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>) static CudaLuDecompositionDoolittle Create(const SparseMatrixPolicy& matrix)
requires(SparseMatrixConcept<SparseMatrixPolicy>)
static CudaLuDecompositionDoolittle Create(const SparseMatrixPolicy& matrix)
{
static_assert(std::is_same_v<SparseMatrixPolicy, LMatrixPolicy>, "SparseMatrixPolicy must be the same as LMatrixPolicy for CUDA LU decomposition");
static_assert(std::is_same_v<SparseMatrixPolicy, UMatrixPolicy>, "SparseMatrixPolicy must be the same as UMatrixPolicy for CUDA LU decomposition");
static_assert(
std::is_same_v<SparseMatrixPolicy, LMatrixPolicy>,
"SparseMatrixPolicy must be the same as LMatrixPolicy for CUDA LU decomposition");
static_assert(
std::is_same_v<SparseMatrixPolicy, UMatrixPolicy>,
"SparseMatrixPolicy must be the same as UMatrixPolicy for CUDA LU decomposition");
CudaLuDecompositionDoolittle lu_decomp(matrix);
return lu_decomp;
}
Expand All @@ -98,15 +103,13 @@ namespace micm
/// @param L is the lower triangular matrix created by decomposition
/// @param U is the upper triangular matrix created by decomposition
template<class SparseMatrixPolicy>
requires(CudaMatrix<SparseMatrixPolicy>&& VectorizableSparse<SparseMatrixPolicy>) void Decompose(
const SparseMatrixPolicy& A,
auto& L,
auto& U) const;
requires(CudaMatrix<SparseMatrixPolicy> && VectorizableSparse<SparseMatrixPolicy>)
void Decompose(const SparseMatrixPolicy& A, auto& L, auto& U) const;
};

template<class SparseMatrixPolicy>
requires(CudaMatrix<SparseMatrixPolicy>&& VectorizableSparse<SparseMatrixPolicy>) void CudaLuDecompositionDoolittle::
Decompose(const SparseMatrixPolicy& A, auto& L, auto& U) const
requires(CudaMatrix<SparseMatrixPolicy> && VectorizableSparse<SparseMatrixPolicy>)
void CudaLuDecompositionDoolittle::Decompose(const SparseMatrixPolicy& A, auto& L, auto& U) const
{
auto L_param = L.AsDeviceParam(); // we need to update lower matrix so it can't be constant and must be an lvalue
auto U_param = U.AsDeviceParam(); // we need to update upper matrix so it can't be constant and must be an lvalue
Expand Down
5 changes: 3 additions & 2 deletions include/micm/cuda/solver/cuda_rosenbrock.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ namespace micm
/// @param alpha
template<class SparseMatrixPolicy>
void AlphaMinusJacobian(SparseMatrixPolicy& jacobian, const double& alpha) const
requires(CudaMatrix<SparseMatrixPolicy>&& VectorizableSparse<SparseMatrixPolicy>)
requires(CudaMatrix<SparseMatrixPolicy> && VectorizableSparse<SparseMatrixPolicy>)
{
auto jacobian_param =
jacobian.AsDeviceParam(); // we need to update jacobian so it can't be constant and must be an lvalue
Expand All @@ -116,7 +116,8 @@ namespace micm
/// @return The scaled norm of the errors
template<class DenseMatrixPolicy>
double NormalizedError(const DenseMatrixPolicy& y_old, const DenseMatrixPolicy& y_new, const DenseMatrixPolicy& errors)
const requires(CudaMatrix<DenseMatrixPolicy>&& VectorizableDense<DenseMatrixPolicy>)
const
requires(CudaMatrix<DenseMatrixPolicy> && VectorizableDense<DenseMatrixPolicy>)
{
return micm::cuda::NormalizedErrorDriver(
y_old.AsDeviceParam(), y_new.AsDeviceParam(), errors.AsDeviceParam(), this->parameters_, this->devstruct_);
Expand Down
6 changes: 4 additions & 2 deletions include/micm/cuda/solver/cuda_state.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,16 @@ namespace micm
: State<DenseMatrixPolicy, SparseMatrixPolicy, LuDecompositionPolicy>(parameters){};

/// @brief Copy input variables to the device
void SyncInputsToDevice() requires(CudaMatrix<DenseMatrixPolicy>&& VectorizableDense<DenseMatrixPolicy>)
void SyncInputsToDevice()
requires(CudaMatrix<DenseMatrixPolicy> && VectorizableDense<DenseMatrixPolicy>)
{
this->variables_.CopyToDevice();
this->rate_constants_.CopyToDevice();
}

/// @brief Copy output variables to the host
void SyncOutputsToHost() requires(CudaMatrix<DenseMatrixPolicy>&& VectorizableDense<DenseMatrixPolicy>)
void SyncOutputsToHost()
requires(CudaMatrix<DenseMatrixPolicy> && VectorizableDense<DenseMatrixPolicy>)
{
this->variables_.CopyToHost();
}
Expand Down
31 changes: 20 additions & 11 deletions include/micm/cuda/util/cuda_dense_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,10 @@ namespace micm

/// Concept for Cuda Matrix
template<typename MatrixType>
concept CudaMatrix = requires(MatrixType t)
{
{
t.CopyToDevice()
} -> std::same_as<void>;
{
t.CopyToHost()
} -> std::same_as<void>;
{
t.AsDeviceParam()
} -> std::same_as<CudaMatrixParam>;
concept CudaMatrix = requires(MatrixType t) {
{ t.CopyToDevice() } -> std::same_as<void>;
{ t.CopyToHost() } -> std::same_as<void>;
{ t.AsDeviceParam() } -> std::same_as<CudaMatrixParam>;
};

template<class T, std::size_t L = MICM_DEFAULT_VECTOR_SIZE>
Expand Down Expand Up @@ -192,6 +185,22 @@ namespace micm
"CUBLAS Daxpy operation failed...");
}

/// @brief For each element of the VectorMatrix, perform y = max(y, x), where x is a scalar constant
/// @param x The scalar constant to compare against
void Max(const T x)
{
static_assert(std::is_same_v<T, double>);
CHECK_CUDA_ERROR(micm::cuda::MatrixMax(this->param_, x), "CudaMatrixMax");
}

/// @brief For each element of the VectorMatrix, perform y = min(y, x), where x is a scalar constant
/// @param x The scalar constant to compare against
void Min(const T x)
{
static_assert(std::is_same_v<T, double>);
CHECK_CUDA_ERROR(micm::cuda::MatrixMin(this->param_, x), "CudaMatrixMin");
}

// Copy the device data from the other Cuda dense matrix into this one
void Copy(const CudaDenseMatrix& other)
{
Expand Down
12 changes: 12 additions & 0 deletions include/micm/cuda/util/cuda_matrix.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,18 @@ namespace micm
/// @returns Error code from free-ing data on device, if any
cudaError_t FreeVector(CudaMatrixParam& vectorMatrix);

/// @brief Sets all elements in a CUDA matrix to their current value or a specified value, whichever is greater
/// @param vectorMatrix Struct containing allocated device memory
/// @param val Value to compare with each element in the matrix
template<typename T>
cudaError_t MatrixMax(CudaMatrixParam& vectorMatrix, T val);

/// @brief Sets all elements in a CUDA matrix to their current value or a specified value, whichever is lesser
/// @param vectorMatrix Struct containing allocated device memory
/// @param val Value to compare with each element in the matrix
template<typename T>
cudaError_t MatrixMin(CudaMatrixParam& vectorMatrix, T val);

/// @brief Copies data from the host to the device
/// @param vectorMatrix Struct containing allocated device memory
/// @param h_data Host data to copy from
Expand Down
4 changes: 2 additions & 2 deletions include/micm/jit/solver/jit_linear_solver.inl
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ namespace micm
}

template<std::size_t L, class SparseMatrixPolicy, class LuDecompositionPolicy>
inline JitLinearSolver<L, SparseMatrixPolicy, LuDecompositionPolicy>
&JitLinearSolver<L, SparseMatrixPolicy, LuDecompositionPolicy>::operator=(JitLinearSolver &&other)
inline JitLinearSolver<L, SparseMatrixPolicy, LuDecompositionPolicy> &
JitLinearSolver<L, SparseMatrixPolicy, LuDecompositionPolicy>::operator=(JitLinearSolver &&other)
{
LinearSolver<SparseMatrixPolicy, LuDecompositionPolicy>::operator=(std::move(other));
solve_function_resource_tracker_ = std::move(other.solve_function_resource_tracker_);
Expand Down
5 changes: 3 additions & 2 deletions include/micm/jit/solver/jit_lu_decomposition_doolittle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,15 @@ namespace micm
/// @brief Create an LU decomposition algorithm for a given sparse matrix policy
/// @param matrix Sparse matrix
template<class SparseMatrixPolicy, class LMatrixPolicy = SparseMatrixPolicy, class UMatrixPolicy = SparseMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>) static JitLuDecompositionDoolittle<L> Create(const SparseMatrixPolicy& matrix);
requires(SparseMatrixConcept<SparseMatrixPolicy>)
static JitLuDecompositionDoolittle<L> Create(const SparseMatrixPolicy &matrix);

/// @brief Create sparse L and U matrices for a given A matrix
/// @param A Sparse matrix that will be decomposed
/// @param lower The lower triangular matrix created by decomposition
/// @param upper The upper triangular matrix created by decomposition
template<class SparseMatrixPolicy>
void Decompose(const SparseMatrixPolicy &A, auto& lower, auto& upper) const;
void Decompose(const SparseMatrixPolicy &A, auto &lower, auto &upper) const;

private:
/// @brief Generates a function to perform the LU decomposition for a specific matrix sparsity structure
Expand Down
21 changes: 11 additions & 10 deletions include/micm/jit/solver/jit_lu_decomposition_doolittle.inl
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ namespace micm
std::to_string(L);
throw std::system_error(make_error_code(MicmJitErrc::InvalidMatrix), msg);
}
Initialize<SparseMatrixPolicy, SparseMatrixPolicy, SparseMatrixPolicy>(matrix, typename SparseMatrixPolicy::value_type());
Initialize<SparseMatrixPolicy, SparseMatrixPolicy, SparseMatrixPolicy>(
matrix, typename SparseMatrixPolicy::value_type());
GenerateDecomposeFunction();
}

Expand All @@ -54,12 +55,15 @@ namespace micm

template<std::size_t L>
template<class SparseMatrixPolicy, class LMatrixPolicy, class UMatrixPolicy>
requires(SparseMatrixConcept<SparseMatrixPolicy>)
inline JitLuDecompositionDoolittle<L> JitLuDecompositionDoolittle<L>::Create(
const SparseMatrixPolicy& matrix)
requires(SparseMatrixConcept<SparseMatrixPolicy>)
inline JitLuDecompositionDoolittle<L> JitLuDecompositionDoolittle<L>::Create(const SparseMatrixPolicy &matrix)
{
static_assert(std::is_same_v<SparseMatrixPolicy, LMatrixPolicy>, "SparseMatrixPolicy must be the same as LMatrixPolicy for JIT LU decomposition");
static_assert(std::is_same_v<SparseMatrixPolicy, UMatrixPolicy>, "SparseMatrixPolicy must be the same as UMatrixPolicy for JIT LU decomposition");
static_assert(
std::is_same_v<SparseMatrixPolicy, LMatrixPolicy>,
"SparseMatrixPolicy must be the same as LMatrixPolicy for JIT LU decomposition");
static_assert(
std::is_same_v<SparseMatrixPolicy, UMatrixPolicy>,
"SparseMatrixPolicy must be the same as UMatrixPolicy for JIT LU decomposition");
JitLuDecompositionDoolittle<L> lu_decomp(matrix);
return lu_decomp;
}
Expand Down Expand Up @@ -220,10 +224,7 @@ namespace micm

template<std::size_t L>
template<class SparseMatrixPolicy>
void JitLuDecompositionDoolittle<L>::Decompose(
const SparseMatrixPolicy &A,
auto& lower,
auto& upper) const
void JitLuDecompositionDoolittle<L>::Decompose(const SparseMatrixPolicy &A, auto &lower, auto &upper) const
{
decompose_function_(A.AsVector().data(), lower.AsVector().data(), upper.AsVector().data());
for (size_t block = 0; block < A.NumberOfBlocks(); ++block)
Expand Down
40 changes: 32 additions & 8 deletions include/micm/process/process.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,24 @@ namespace micm
/// @brief Recalculate the rate constants for each process for the current state
/// @param processes The set of processes being solved
/// @param state The solver state to update
template<class DenseMatrixPolicy, class SparseMatrixPolicy, class LuDecompositionPolicy, class LMatrixPolicy, class UMatrixPolicy>
requires(!VectorizableDense<DenseMatrixPolicy>) static void CalculateRateConstants(
template<
class DenseMatrixPolicy,
class SparseMatrixPolicy,
class LuDecompositionPolicy,
class LMatrixPolicy,
class UMatrixPolicy>
requires(!VectorizableDense<DenseMatrixPolicy>)
static void CalculateRateConstants(
const std::vector<Process>& processes,
State<DenseMatrixPolicy, SparseMatrixPolicy, LuDecompositionPolicy, LMatrixPolicy, UMatrixPolicy>& state);
template<class DenseMatrixPolicy, class SparseMatrixPolicy, class LuDecompositionPolicy, class LMatrixPolicy, class UMatrixPolicy>
requires(VectorizableDense<DenseMatrixPolicy>) static void CalculateRateConstants(
template<
class DenseMatrixPolicy,
class SparseMatrixPolicy,
class LuDecompositionPolicy,
class LMatrixPolicy,
class UMatrixPolicy>
requires(VectorizableDense<DenseMatrixPolicy>)
static void CalculateRateConstants(
const std::vector<Process>& processes,
State<DenseMatrixPolicy, SparseMatrixPolicy, LuDecompositionPolicy, LMatrixPolicy, UMatrixPolicy>& state);

Expand Down Expand Up @@ -147,8 +159,14 @@ namespace micm
ProcessBuilder& SetPhase(const Phase& phase);
};

template<class DenseMatrixPolicy, class SparseMatrixPolicy, class LuDecompositionPolicy, class LMatrixPolicy, class UMatrixPolicy>
requires(!VectorizableDense<DenseMatrixPolicy>) void Process::CalculateRateConstants(
template<
class DenseMatrixPolicy,
class SparseMatrixPolicy,
class LuDecompositionPolicy,
class LMatrixPolicy,
class UMatrixPolicy>
requires(!VectorizableDense<DenseMatrixPolicy>)
void Process::CalculateRateConstants(
const std::vector<Process>& processes,
State<DenseMatrixPolicy, SparseMatrixPolicy, LuDecompositionPolicy, LMatrixPolicy, UMatrixPolicy>& state)
{
Expand All @@ -172,8 +190,14 @@ namespace micm
}
}

template<class DenseMatrixPolicy, class SparseMatrixPolicy, class LuDecompositionPolicy, class LMatrixPolicy, class UMatrixPolicy>
requires(VectorizableDense<DenseMatrixPolicy>) void Process::CalculateRateConstants(
template<
class DenseMatrixPolicy,
class SparseMatrixPolicy,
class LuDecompositionPolicy,
class LMatrixPolicy,
class UMatrixPolicy>
requires(VectorizableDense<DenseMatrixPolicy>)
void Process::CalculateRateConstants(
const std::vector<Process>& processes,
State<DenseMatrixPolicy, SparseMatrixPolicy, LuDecompositionPolicy, LMatrixPolicy, UMatrixPolicy>& state)
{
Expand Down
Loading

0 comments on commit 0c93161

Please sign in to comment.