Skip to content

Commit

Permalink
Remove group_sizes from highest level functions and optimize XTX oper…
Browse files Browse the repository at this point in the history
…ation.
  • Loading branch information
JamesYang007 committed Nov 9, 2023
1 parent 7f74499 commit 070700e
Show file tree
Hide file tree
Showing 15 changed files with 463 additions and 299 deletions.
16 changes: 9 additions & 7 deletions adelie/solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,7 @@ def grpnet(
*,
X: np.ndarray,
y: np.ndarray,
groups: np.ndarray,
group_sizes: np.ndarray,
groups: np.ndarray =None,
alpha: float =1,
penalty: np.ndarray =None,
weights: np.ndarray =None,
Expand Down Expand Up @@ -204,12 +203,10 @@ def grpnet(
or a ``numpy`` array.
y : (n,) np.ndarray
Response vector.
groups : (G,) np.ndarray
groups : (G,) np.ndarray, optional
List of starting indices to each group where `G` is the number of groups.
``groups[i]`` is the starting index of the ``i`` th group.
group_sizes : (G,) np.ndarray
List of group sizes corresponding to each element in ``groups``.
``group_sizes[i]`` is the group size of the ``i`` th group.
Default is ``np.arange(p)``.
alpha : float, optional
Elastic net parameter.
It must be in the range :math:`[0,1]`.
Expand Down Expand Up @@ -319,6 +316,12 @@ def grpnet(
)

n, p = X.rows(), X.cols()

if groups is None:
groups = np.arange(p, dtype=int)
group_sizes = np.concatenate([groups, [p]], dtype=int)
group_sizes = group_sizes[1:] - group_sizes[:-1]

G = len(groups)

if weights is None:
Expand Down Expand Up @@ -356,7 +359,6 @@ def grpnet(
y_var=y_var,
resid=resid,
groups=groups,
group_sizes=group_sizes,
alpha=alpha,
penalty=penalty,
weights=weights,
Expand Down
18 changes: 18 additions & 0 deletions adelie/src/include/adelie_core/matrix/matrix_naive_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,24 @@ class MatrixNaiveBase
Eigen::Ref<vec_value_t> out
) =0;

/**
* @brief Computes weighted covariance matrix.
*
* @param j begin column index
* @param q number of columns.
* @param sqrt_weights square-root of the weights.
* @param means corresponding column means for columns [j, j+q).
* @param center true if columns should be centered by means.
* @param out resulting covariance matrix.
* @param buffer (n, q) extra buffer space if needed.
*/
virtual void cov(
int j, int q,
const Eigen::Ref<const vec_value_t>& sqrt_weights,
Eigen::Ref<colmat_value_t> out,
Eigen::Ref<colmat_value_t> buffer
) const =0;

/**
* @brief Returns the number of rows of the represented matrix.
*/
Expand Down
40 changes: 29 additions & 11 deletions adelie/src/include/adelie_core/matrix/matrix_naive_dense.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,34 @@ class MatrixNaiveDense: public MatrixNaiveBase<typename DenseType::Scalar>
);
}

int rows() const override
{
return _mat.rows();
}

int cols() const override
{
return _mat.cols();
}

void cov(
int j, int q,
const Eigen::Ref<const vec_value_t>& sqrt_weights,
Eigen::Ref<colmat_value_t> out,
Eigen::Ref<colmat_value_t> buffer
) const override
{
auto& Xj = buffer;

Xj.transpose().array() = (
_mat.middleCols(j, q).transpose().array().rowwise() * sqrt_weights
);

Eigen::setNbThreads(_n_threads);
out.noalias() = Xj.transpose() * Xj;
Eigen::setNbThreads(0);
}

void sp_btmul(
int j, int q,
const sp_mat_value_t& v,
Expand Down Expand Up @@ -142,20 +170,10 @@ class MatrixNaiveDense: public MatrixNaiveBase<typename DenseType::Scalar>
);
const auto size = block_size + (t < remainder);
for (int j = 0; j < size; ++j) {
out[begin + j] = _mat.col(j).dot(weights.matrix());
out[begin + j] = _mat.col(begin + j).dot(weights.matrix());
}
}
}

int rows() const override
{
return _mat.rows();
}

int cols() const override
{
return _mat.cols();
}
};

} // namespace matrix
Expand Down
67 changes: 50 additions & 17 deletions adelie/src/include/adelie_core/matrix/matrix_naive_snp_unphased.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ class MatrixNaiveSNPUnphased : public MatrixNaiveBase<ValueType>
using typename base_t::sp_mat_value_t;
using io_t = io::IOSNPUnphased;
using string_t = std::string;
using vec_vec_index_t = util::rowvec_type<vec_index_t>;
using dyn_vec_string_t = std::vector<string_t>;
using dyn_vec_io_t = std::vector<io_t>;

Expand Down Expand Up @@ -135,7 +134,6 @@ class MatrixNaiveSNPUnphased : public MatrixNaiveBase<ValueType>
const auto value = io.value(index);

value_t sum = 0;
#pragma omp simd reduction(+:sum)
for (int i = 0; i < inner.size(); ++i) {
sum += v[inner[i]] * value[i];
}
Expand Down Expand Up @@ -169,8 +167,7 @@ class MatrixNaiveSNPUnphased : public MatrixNaiveBase<ValueType>
Eigen::Ref<vec_value_t> out
) override
{
const auto n_threads = std::min<size_t>(_n_threads, q);
#pragma omp parallel for schedule(static) num_threads(n_threads)
#pragma omp parallel for schedule(static) num_threads(_n_threads)
for (int t = 0; t < q; ++t)
{
const auto slice = _io_slice_map[j+t];
Expand All @@ -180,7 +177,6 @@ class MatrixNaiveSNPUnphased : public MatrixNaiveBase<ValueType>
const auto value = io.value(index);

value_t sum = 0;
#pragma omp simd reduction(+:sum)
for (int i = 0; i < inner.size(); ++i) {
sum += v[inner[i]] * value[i];
}
Expand Down Expand Up @@ -217,6 +213,41 @@ class MatrixNaiveSNPUnphased : public MatrixNaiveBase<ValueType>
bmul(0, cols(), v, out);
}

void cov(
int j, int q,
const Eigen::Ref<const vec_value_t>& sqrt_weights,
Eigen::Ref<colmat_value_t> out,
Eigen::Ref<colmat_value_t>
) const override
{
#pragma omp parallel for schedule(static) num_threads(_n_threads)
for (int i1 = 0; i1 < q; ++i1) {
for (int i2 = 0; i2 <= i1; ++i2) {
const auto slice_1 = _io_slice_map[j+i1];
const auto slice_2 = _io_slice_map[j+i2];
const auto& io_1 = _ios[slice_1];
const auto& io_2 = _ios[slice_2];
const auto index_1 = _io_index_map[j+i1];
const auto index_2 = _io_index_map[j+i2];
const auto inner_1 = io_1.inner(index_1);
const auto inner_2 = io_2.inner(index_2);
const auto value_1 = io_1.value(index_1);
const auto value_2 = io_2.value(index_2);

out(i1, i2) = svsvwdot(
inner_1, value_1,
inner_2, value_2,
sqrt_weights.square()
);
}
}
for (int i1 = 0; i1 < q; ++i1) {
for (int i2 = i1+1; i2 < q; ++i2) {
out(i1, i2) = out(i2, i1);
}
}
}

int rows() const override { return _ios[0].rows(); }
int cols() const override { return _p; }

Expand All @@ -227,8 +258,7 @@ class MatrixNaiveSNPUnphased : public MatrixNaiveBase<ValueType>
Eigen::Ref<rowmat_value_t> out
) const override
{
const auto n_threads = std::min<size_t>(_n_threads, v.outerSize());
#pragma omp parallel for schedule(static) num_threads(n_threads)
#pragma omp parallel for schedule(static) num_threads(_n_threads)
for (int k = 0; k < v.outerSize(); ++k) {
typename sp_mat_value_t::InnerIterator it(v, k);
auto out_k = out.row(k);
Expand All @@ -253,14 +283,19 @@ class MatrixNaiveSNPUnphased : public MatrixNaiveBase<ValueType>
Eigen::Ref<colmat_value_t> out
) const override
{
auto begin = 0;
while (begin < q) {
const auto slice = _io_slice_map[j+begin];
#pragma omp parallel for schedule(auto) num_threads(_n_threads)
for (int k = 0; k < q; ++k) {
const auto slice = _io_slice_map[j+k];
const auto& io = _ios[slice];
const auto index = _io_index_map[j+begin];
const auto size = std::min<size_t>(q - begin, io.cols() - index);
out.middleCols(begin, size) = io.to_dense(1).middleCols(index, size).template cast<value_t>();
begin += size;
const auto index = _io_index_map[j+k];
const auto inner = io.inner(index);
const auto value = io.value(index);
auto out_k = out.col(k);

out_k.setZero();
for (int i = 0; i < inner.size(); ++i) {
out_k[inner[i]] = value[i];
}
}
}

Expand All @@ -270,8 +305,7 @@ class MatrixNaiveSNPUnphased : public MatrixNaiveBase<ValueType>
) const override
{
const auto p = cols();
const auto n_threads = std::min<size_t>(_n_threads, p);
#pragma omp parallel for schedule(static) num_threads(n_threads)
#pragma omp parallel for schedule(static) num_threads(_n_threads)
for (int j = 0; j < p; ++j)
{
const auto slice = _io_slice_map[j];
Expand All @@ -280,7 +314,6 @@ class MatrixNaiveSNPUnphased : public MatrixNaiveBase<ValueType>
const auto inner = io.inner(index);
const auto value = io.value(index);
value_t sum = 0;
#pragma omp simd reduction(+:sum)
for (int i = 0; i < inner.size(); ++i) {
sum += weights[inner[i]] * value[i];
}
Expand Down
38 changes: 37 additions & 1 deletion adelie/src/include/adelie_core/matrix/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ void dvzero(
const int block_size = n / n_blocks;
const int remainder = n % n_blocks;

#pragma omp parallel for schedule(static) num_threads(n_blocks)
#pragma omp parallel for schedule(static) num_threads(n_threads)
for (int t = 0; t < n_blocks; ++t)
{
const auto begin = (
Expand All @@ -209,5 +209,41 @@ void dvzero(
}
}

template <class InnerType, class ValueType, class WeightsType>
ADELIE_CORE_STRONG_INLINE
auto svsvwdot(
const InnerType& inner_1,
const ValueType& value_1,
const InnerType& inner_2,
const ValueType& value_2,
const WeightsType& weights
)
{
using value_t = typename std::decay_t<WeightsType>::Scalar;

int i1 = 0;
int i2 = 0;
value_t sum = 0;
while (
(i1 < inner_1.size()) &&
(i2 < inner_2.size())
) {
while ((i1 < inner_1.size()) && (inner_1[i1] < inner_2[i2])) ++i1;
if (i1 == inner_1.size()) break;
while ((i2 < inner_2.size()) && (inner_2[i2] < inner_1[i1])) ++i2;
if (i2 == inner_2.size()) break;
while (
(i1 < inner_1.size()) &&
(i2 < inner_2.size()) &&
(inner_1[i1] == inner_2[i2])
) {
sum += value_1[i1] * value_2[i2] * weights[inner_1[i1]];
++i1;
++i2;
}
}
return sum;
}

} // namespace matrix
} // namespace adelie_core
25 changes: 10 additions & 15 deletions adelie/src/include/adelie_core/state/state_basil_naive.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ void update_screen_derived_naive(
const auto& screen_set = state.screen_set;
const auto& screen_begins = state.screen_begins;
const auto intercept = state.intercept;
const auto n_threads = state.n_threads;
auto& X = *state.X;
auto& screen_X_means = state.screen_X_means;
auto& screen_transforms = state.screen_transforms;
Expand All @@ -52,37 +51,33 @@ void update_screen_derived_naive(
screen_vars.resize(new_screen_value_size, 0);

// buffers
util::colmat_type<value_t> Xi;
const auto n = X.rows();
const auto max_gs = group_sizes.maxCoeff();
util::colmat_type<value_t> buffer(n, max_gs);
util::colmat_type<value_t> XiTXi;

for (size_t i = old_screen_size; i < new_screen_size; ++i) {
const auto g = groups[screen_set[i]];
const auto gs = group_sizes[screen_set[i]];
const auto sb = screen_begins[i];
const auto n = X.rows();

// get dense version of the group matrix block
Xi.resize(n, gs);
X.to_dense(g, gs, Xi);
// resize output and buffer
auto Xi = buffer.leftCols(gs);
XiTXi.resize(gs, gs);

// compute column-means
Eigen::Map<vec_value_t> Xi_means(
screen_X_means.data() + sb, gs
);
Xi_means = X_means.segment(g, gs);

// if intercept, must center first
// compute weighted covariance matrix
X.cov(g, gs, weights_sqrt, XiTXi, Xi);

if (intercept) {
auto Xia = Xi.array();
matrix::dmvsubi(Xia, Xi_means, n_threads);
XiTXi.noalias() -= Xi_means.matrix().transpose() * Xi_means.matrix();
}

// transform data
Eigen::setNbThreads(n_threads);
Xi.transpose().array().rowwise() *= weights_sqrt;
XiTXi.noalias() = Xi.transpose() * Xi;
Eigen::setNbThreads(0);

Eigen::SelfAdjointEigenSolver<util::colmat_type<value_t>> solver(XiTXi);

/* update screen_transforms */
Expand Down
Loading

0 comments on commit 070700e

Please sign in to comment.