From b3958d62265a98f8f60017dbab5ff14a99d7bac0 Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Tue, 22 Oct 2024 13:36:27 -0400 Subject: [PATCH] cleaned up several of the scoring fns and improved c++ fns --- NAMESPACE | 3 -- R/marge2.R | 9 ++--- R/score_fun_gee.R | 60 +++++++++---------------------- R/score_fun_glm.R | 32 +++++------------ R/stat_out_score_gee_null.R | 66 ++++++++++------------------------- R/stat_out_score_glm_null.R | 16 +++------ man/marge2.Rd | 2 +- src/eigenMapInvert.cpp | 6 ++-- src/eigenMapMatMult.cpp | 4 ++- src/eigenMapPseudoInverse.cpp | 8 +++-- 10 files changed, 64 insertions(+), 142 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 915f26b..91f24bd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -55,19 +55,16 @@ importFrom(dplyr,contains) importFrom(dplyr,desc) importFrom(dplyr,distinct) importFrom(dplyr,filter) -importFrom(dplyr,group_by) importFrom(dplyr,if_else) importFrom(dplyr,inner_join) importFrom(dplyr,lag) importFrom(dplyr,lead) importFrom(dplyr,mutate) -importFrom(dplyr,ntile) importFrom(dplyr,pull) importFrom(dplyr,relocate) importFrom(dplyr,rename) importFrom(dplyr,row_number) importFrom(dplyr,rowwise) -importFrom(dplyr,sample_frac) importFrom(dplyr,select) importFrom(dplyr,slice_head) importFrom(dplyr,summarise) diff --git a/R/marge2.R b/R/marge2.R index 4e24a08..9cd95a9 100644 --- a/R/marge2.R +++ b/R/marge2.R @@ -8,7 +8,7 @@ #' @description MARS fitting function for negative binomial generalized linear models (GLMs) & generalized estimating equations (GEEs). #' @import glm2 #' @import magrittr -#' @importFrom dplyr mutate ntile group_by sample_frac +#' @importFrom dplyr mutate #' @importFrom geeM geem #' @importFrom MASS glm.nb negative.binomial theta.mm #' @importFrom utils tail @@ -24,7 +24,7 @@ #' @param cor.structure If \code{is.gee = TRUE}, a string specifying the desired correlation structure for the NB GEE. Defaults to "ar1". #' @param sandwich.var (Optional) Should the sandwich variance estimator be used instead of the model-based estimator? Default to FALSE. #' @param approx.knot (Optional) Should the set of candidate knots be subsampled in order to speed up computation? This has little effect on the final fit, but can improve computation time somewhat. Defaults to TRUE. -#' @param n.knot.max (Optional) The maximum number of candidate knots to consider. Uses random sampling (don't worry, a random seed is set internally) to select this number of unique values from the reduced set of all candidate knots. Defaults to 50. +#' @param n.knot.max (Optional) The maximum number of candidate knots to consider. Uses uniform sampling to select this number of unique values from the reduced set of all candidate knots. Defaults to 50. #' @param glm.backend (Optional) Character specifying which GLM-fitting backend should be used. Must be one of "MASS" or "speedglm". Defaults to "MASS". #' @param tols_score (Optional) The set tolerance for monitoring the convergence for the difference in score statistics between the parent and candidate model (this is the lack-of-fit criterion used for MARGE). Defaults to 0.00001. #' @param minspan (Optional) A set minimum span value. Defaults to NULL. @@ -180,9 +180,7 @@ marge2 <- function(X_pred = NULL, score_knot_both_add_mat <- NULL score_knot_one_int_mat <- NULL score_knot_one_add_mat <- NULL - int.count1 <- 0 - in.set <- ifelse(ncol(B) > 1, sum(!var_name_vec %in% var_name), 0) for (t in seq_along(X_red)) { @@ -825,8 +823,7 @@ marge2 <- function(X_pred = NULL, } if (!is.glmm) { if (!is.null(Y.offset)) { - model_df <- dplyr::mutate(model_df, - cell_offset = Y.offset) + model_df <- dplyr::mutate(model_df, ßcell_offset = Y.offset) model_formula <- paste0(model_formula, " + ", "offset(log(1 / cell_offset))") # this is correct i promise } model_formula <- stats::as.formula(model_formula) diff --git a/R/score_fun_gee.R b/R/score_fun_gee.R index adde7df..cbef03a 100644 --- a/R/score_fun_gee.R +++ b/R/score_fun_gee.R @@ -57,56 +57,28 @@ score_fun_gee <- function(Y = NULL, AWA.est_i <- AWA.est_list[[i]] J2_i <- J2_list[[i]] Sigma2_i <- Sigma2_list[[i]] - D.est_i <- eigenMapMatMult(A = diag(mu.est[(sum(n_vec1[seq(i)]) + 1):k], nrow = n_vec[i], ncol = n_vec[i]), - B = XA[(sum(n_vec1[seq(i)]) + 1):k, ], - n_cores = 1) + D.est_i <- eigenMapMatMult(A = diag(mu.est[(sum(n_vec1[seq(i)]) + 1):k], nrow = n_vec[i], ncol = n_vec[i]), B = XA[(sum(n_vec1[seq(i)]) + 1):k, ]) D_est_i_transpose <- t(D.est_i) - J21 <- J21 + eigenMapMatMult(A = D_est_i_transpose, - B = t(J2_i), - n_cores = 1) - Sigma21 <- Sigma21 + eigenMapMatMult(A = D_est_i_transpose, - B = t(Sigma2_i), - n_cores = 1) - B.est <- B.est + eigenMapMatMult(A = D_est_i_transpose, - B = VS.est_i, - n_cores = 1) - temp_prod <- eigenMapMatMult(A = D_est_i_transpose, - B = AWA.est_i, - n_cores = 1) - Sigma22 <- Sigma22 + eigenMapMatMult(A = temp_prod, - B = D.est_i, - n_cores = 1) + J21 <- J21 + eigenMapMatMult(A = D_est_i_transpose, B = t(J2_i)) + Sigma21 <- Sigma21 + eigenMapMatMult(A = D_est_i_transpose, B = t(Sigma2_i)) + B.est <- B.est + eigenMapMatMult(A = D_est_i_transpose, B = VS.est_i) + temp_prod <- eigenMapMatMult(A = D_est_i_transpose, B = AWA.est_i) + Sigma22 <- Sigma22 + eigenMapMatMult(A = temp_prod, B = D.est_i) } - temp_prod_1 <- eigenMapMatMult(A = J21, - B = J11.inv, - n_cores = 1) - temp_prod_1 <- eigenMapMatMult(A = temp_prod_1, - B = t(Sigma21), - n_cores = 1) - temp_prod_2 <- eigenMapMatMult(A = Sigma21, - B = J11.inv, - n_cores = 1) + temp_prod_1 <- eigenMapMatMult(A = J21, B = J11.inv) + temp_prod_1 <- eigenMapMatMult(A = temp_prod_1, B = t(Sigma21)) + temp_prod_2 <- eigenMapMatMult(A = Sigma21, B = J11.inv) J21_transpose <- t(J21) - temp_prod_2 <- eigenMapMatMult(A = temp_prod_2, - B = J21_transpose, - n_cores = 1) - temp_prod_3 <- eigenMapMatMult(A = J21, - B = JSigma11, - n_cores = 1) - temp_prod_3 <- eigenMapMatMult(A = temp_prod_3, - B = J21_transpose, - n_cores = 1) + temp_prod_2 <- eigenMapMatMult(A = temp_prod_2, B = J21_transpose) + temp_prod_3 <- eigenMapMatMult(A = J21, B = JSigma11) + temp_prod_3 <- eigenMapMatMult(A = temp_prod_3, B = J21_transpose) Sigma <- Sigma22 - temp_prod_1 - temp_prod_2 + temp_prod_3 - Sigma_inv <- try({ eigenMapMatrixInvert(Sigma, n_cores = 1L) }, silent = TRUE) + Sigma_inv <- try({ eigenMapMatrixInvert(Sigma) }, silent = TRUE) if (inherits(Sigma_inv, "try-error")) { - Sigma_inv <- eigenMapPseudoInverse(Sigma, n_cores = 1L) + Sigma_inv <- eigenMapPseudoInverse(Sigma) } - temp_prod <- eigenMapMatMult(A = t(B.est), - B = Sigma_inv, - n_cores = 1) - score <- eigenMapMatMult(A = temp_prod, - B = B.est, - n_cores = 1) + temp_prod <- eigenMapMatMult(A = t(B.est), B = Sigma_inv) + score <- eigenMapMatMult(A = temp_prod, B = B.est) } res <- list(score = score) return(res) diff --git a/R/score_fun_glm.R b/R/score_fun_glm.R index 36b6d1f..8aa00f9 100644 --- a/R/score_fun_glm.R +++ b/R/score_fun_glm.R @@ -36,32 +36,18 @@ score_fun_glm <- function(Y = NULL, VS.est_i <- unlist(VS.est_list) A_list_i <- unlist(A_list) B1_list_i <- unlist(B1_list) - B_list_i <- eigenMapMatMult(A = B1_list_i, - B = XA, - n_cores = 1) - D_list_i <- eigenMapMatMult(A = t(XA), - B = (XA * c(mu.est^2 / V.est)), - n_cores = 1) - temp_prod <- eigenMapMatMult(A = t(B_list_i), - B = A_list_i, - n_cores = 1) - temp_prod <- eigenMapMatMult(A = temp_prod, - B = B_list_i, - n_cores = 1) + B_list_i <- eigenMapMatMult(A = B1_list_i, B = XA) + D_list_i <- eigenMapMatMult(A = t(XA), B = (XA * c(mu.est^2 / V.est))) + temp_prod <- eigenMapMatMult(A = t(B_list_i), B = A_list_i) + temp_prod <- eigenMapMatMult(A = temp_prod, B = B_list_i) inv.XVX_22 <- D_list_i - temp_prod - B.est <- eigenMapMatMult(A = t(mu.est * VS.est_i), - B = XA, - n_cores = 1) - XVX_22 <- try({ eigenMapMatrixInvert(inv.XVX_22, n_cores = 1L) }, silent = TRUE) + B.est <- eigenMapMatMult(A = t(mu.est * VS.est_i), B = XA) + XVX_22 <- try({ eigenMapMatrixInvert(inv.XVX_22) }, silent = TRUE) if (inherits(XVX_22, "try-error")) { - XVX_22 <- eigenMapPseudoInverse(inv.XVX_22, n_cores = 1L) + XVX_22 <- eigenMapPseudoInverse(inv.XVX_22) } - temp_prod <- eigenMapMatMult(A = B.est, - B = XVX_22, - n_cores = 1) - score <- eigenMapMatMult(A = temp_prod, - B = t(B.est), - n_cores = 1) + temp_prod <- eigenMapMatMult(A = B.est, B = XVX_22) + score <- eigenMapMatMult(A = temp_prod, B = t(B.est)) } res <- list(score = score) return(res) diff --git a/R/stat_out_score_gee_null.R b/R/stat_out_score_gee_null.R index 4fea157..667bb20 100644 --- a/R/stat_out_score_gee_null.R +++ b/R/stat_out_score_gee_null.R @@ -66,52 +66,26 @@ stat_out_score_gee_null <- function(Y = NULL, diag_sqrt_V_est <- diag(sqrt(V_est[temp_seq_n]), nrow = n_vec[i], ncol = n_vec[i]) - temp_prod <- eigenMapMatMult(A = diag_sqrt_V_est, - B = R_alpha, - n_cores = 1) - V_est_i <- eigenMapMatMult(A = temp_prod, - B = diag_sqrt_V_est, - n_cores = 1) - V_est_i_inv <- try({ eigenMapMatrixInvert(V_est_i, n_cores = 1L) }, silent = TRUE) + temp_prod <- eigenMapMatMult(A = diag_sqrt_V_est, B = R_alpha) + V_est_i <- eigenMapMatMult(A = temp_prod, B = diag_sqrt_V_est) + V_est_i_inv <- try({ eigenMapMatrixInvert(V_est_i) }, silent = TRUE) if (inherits(V_est_i_inv, "try-error")) { - V_est_i_inv <- eigenMapPseudoInverse(V_est_i, n_cores = 1L) + V_est_i_inv <- eigenMapPseudoInverse(V_est_i) } S_est_i <- t(Y)[temp_seq_n] - mu_est_sub - temp_prod <- eigenMapMatMult(A = S_est_i, - B = t(S_est_i), - n_cores = 1) - temp_prod <- eigenMapMatMult(A = V_est_i_inv, - B = temp_prod, - n_cores = 1) - AWA_est_i <- eigenMapMatMult(A = temp_prod, - B = V_est_i_inv, - n_cores = 1) - D_est_i <- eigenMapMatMult(A = diag(mu_est_sub, - nrow = n_vec[i], - ncol = n_vec[i]), - B = B_null[temp_seq_n, ], - n_cores = 1) + temp_prod <- eigenMapMatMult(A = S_est_i, B = t(S_est_i)) + temp_prod <- eigenMapMatMult(A = V_est_i_inv, B = temp_prod) + AWA_est_i <- eigenMapMatMult(A = temp_prod, B = V_est_i_inv) + D_est_i <- eigenMapMatMult(A = diag(mu_est_sub, nrow = n_vec[i], ncol = n_vec[i]), B = B_null[temp_seq_n, ]) D_est_i_transpose <- t(D_est_i) - temp_prod <- eigenMapMatMult(A = D_est_i_transpose, - B = V_est_i_inv, - n_cores = 1) - J1_i <- eigenMapMatMult(A = temp_prod, - B = D_est_i, - n_cores = 1) + temp_prod <- eigenMapMatMult(A = D_est_i_transpose, B = V_est_i_inv) + J1_i <- eigenMapMatMult(A = temp_prod, B = D_est_i) J11 <- J11 + J1_i - J2_i <- eigenMapMatMult(A = D_est_i_transpose, - B = V_est_i_inv, - n_cores = 1) - Sigma2_i <- eigenMapMatMult(A = D_est_i_transpose, - B = AWA_est_i, - n_cores = 1) - Sigma1_i <- eigenMapMatMult(A = Sigma2_i, - B = D_est_i, - n_cores = 1) + J2_i <- eigenMapMatMult(A = D_est_i_transpose, B = V_est_i_inv) + Sigma2_i <- eigenMapMatMult(A = D_est_i_transpose, B = AWA_est_i) + Sigma1_i <- eigenMapMatMult(A = Sigma2_i, B = D_est_i) Sigma11 <- Sigma11 + Sigma1_i - V_est_list_elem_2 <- eigenMapMatMult(A = V_est_i_inv, - B = S_est_i, - n_cores = 1) + V_est_list_elem_2 <- eigenMapMatMult(A = V_est_i_inv, B = S_est_i) VS_est_list[[i]] <- V_est_list_elem_2 AWA_est_list[[i]] <- AWA_est_i J2_list[[i]] <- J2_i @@ -120,17 +94,13 @@ stat_out_score_gee_null <- function(Y = NULL, if (nrow(J11) == 1 && ncol(J11) == 1) { J11_inv <- 1 / J11 } else { - J11_inv <- try({ eigenMapMatrixInvert(J11, n_cores = 1L) }, silent = TRUE) + J11_inv <- try({ eigenMapMatrixInvert(J11) }, silent = TRUE) if (inherits(J11_inv, "try-error")) { - J11_inv <- eigenMapPseudoInverse(J11, n_cores = 1L) + J11_inv <- eigenMapPseudoInverse(J11) } } - temp_prod <- eigenMapMatMult(A = J11_inv, - B = Sigma11, - n_cores = 1) - JSigma11 <- eigenMapMatMult(A = temp_prod, - B = J11_inv, - n_cores = 1) + temp_prod <- eigenMapMatMult(A = J11_inv, B = Sigma11) + JSigma11 <- eigenMapMatMult(A = temp_prod, B = J11_inv) res <- list(VS.est_list = VS_est_list, AWA.est_list = AWA_est_list, J2_list = J2_list, diff --git a/R/stat_out_score_glm_null.R b/R/stat_out_score_glm_null.R index 0d30dc5..4f017f5 100644 --- a/R/stat_out_score_glm_null.R +++ b/R/stat_out_score_glm_null.R @@ -28,23 +28,17 @@ stat_out_score_glm_null <- function(Y = NULL, B_null = NULL) { V_est <- mu_est * (1 + mu_est * sigma_est) # Type I NB variance = mu (1 + mu * sigma); sigma = 1 / theta VS_est_list <- (Y - mu_est) / V_est mu_V_diag <- diag(c(mu_est^2 / V_est)) - temp_prod <- eigenMapMatMult(A = t(B_null), - B = mu_V_diag, - n_cores = 1) - A_list_inv <- eigenMapMatMult(A = temp_prod, - B = B_null, - n_cores = 1) + temp_prod <- eigenMapMatMult(A = t(B_null), B = mu_V_diag) + A_list_inv <- eigenMapMatMult(A = temp_prod, B = B_null) if (ncol(A_list_inv) == 1 && nrow(A_list_inv) == 1) { A_list <- 1 / A_list_inv } else { - A_list <- try({ eigenMapMatrixInvert(A_list_inv, n_cores = 1L) }, silent = TRUE) + A_list <- try({ eigenMapMatrixInvert(A_list_inv) }, silent = TRUE) if (inherits(A_list, "try-error")) { - A_list <- eigenMapPseudoInverse(A_list_inv, n_cores = 1L) + A_list <- eigenMapPseudoInverse(A_list_inv) } } - B1_list <- eigenMapMatMult(A = t(B_null), - B = mu_V_diag, - n_cores = 1) + B1_list <- eigenMapMatMult(A = t(B_null), B = mu_V_diag) res <- list(VS.est_list = VS_est_list, A_list = A_list, B1_list = B1_list, diff --git a/man/marge2.Rd b/man/marge2.Rd index 962edc1..847124a 100644 --- a/man/marge2.Rd +++ b/man/marge2.Rd @@ -45,7 +45,7 @@ marge2( \item{approx.knot}{(Optional) Should the set of candidate knots be subsampled in order to speed up computation? This has little effect on the final fit, but can improve computation time somewhat. Defaults to TRUE.} -\item{n.knot.max}{(Optional) The maximum number of candidate knots to consider. Uses random sampling (don't worry, a random seed is set internally) to select this number of unique values from the reduced set of all candidate knots. Defaults to 50.} +\item{n.knot.max}{(Optional) The maximum number of candidate knots to consider. Uses uniform sampling to select this number of unique values from the reduced set of all candidate knots. Defaults to 50.} \item{glm.backend}{(Optional) Character specifying which GLM-fitting backend should be used. Must be one of "MASS" or "speedglm". Defaults to "MASS".} diff --git a/src/eigenMapInvert.cpp b/src/eigenMapInvert.cpp index 6eef2f4..a8d45e3 100644 --- a/src/eigenMapInvert.cpp +++ b/src/eigenMapInvert.cpp @@ -3,8 +3,10 @@ // [[Rcpp::depends(RcppEigen)]] // [[Rcpp::export]] -Eigen::MatrixXd eigenMapMatrixInvert(const Eigen::Map A, int n_cores = 1){ - Eigen::setNbThreads(n_cores); +Eigen::MatrixXd eigenMapMatrixInvert(const Eigen::Map A, int n_cores = 1) { + if (n_cores > 1) { + Eigen::setNbThreads(n_cores); + } Eigen::MatrixXd B = A.llt().solve(Eigen::MatrixXd::Identity(A.rows(), A.cols())); return B; } diff --git a/src/eigenMapMatMult.cpp b/src/eigenMapMatMult.cpp index cf697ef..b760b57 100644 --- a/src/eigenMapMatMult.cpp +++ b/src/eigenMapMatMult.cpp @@ -6,7 +6,9 @@ Eigen::MatrixXd eigenMapMatMult(const Eigen::Map A, const Eigen::Map B, int n_cores = 1) { - Eigen::setNbThreads(n_cores); + if (n_cores > 1) { + Eigen::setNbThreads(n_cores); + } Eigen::MatrixXd C; C.noalias() = A * B; return C; diff --git a/src/eigenMapPseudoInverse.cpp b/src/eigenMapPseudoInverse.cpp index 88758ef..c0f369b 100644 --- a/src/eigenMapPseudoInverse.cpp +++ b/src/eigenMapPseudoInverse.cpp @@ -3,10 +3,12 @@ // [[Rcpp::depends(RcppEigen)]] // [[Rcpp::export]] -Eigen::MatrixXd eigenMapPseudoInverse(const Eigen::Map A, - double tolerance = 1e-6, +Eigen::MatrixXd eigenMapPseudoInverse(const Eigen::Map A, + double tolerance = 1e-6, int n_cores = 1) { - Eigen::setNbThreads(n_cores); + if (n_cores > 1) { + Eigen::setNbThreads(n_cores); + } Eigen::JacobiSVD svd(A, Eigen::ComputeThinU | Eigen::ComputeThinV); Eigen::MatrixXd singularValues = svd.singularValues(); Eigen::MatrixXd singularValuesInv(A.cols(), A.rows());