Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes mandated by CRAN #1374

Merged
merged 6 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
^\.github$
^codecov\.yml$
1 change: 1 addition & 0 deletions .github/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.html
21 changes: 12 additions & 9 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ on:
push:
branches: [main, master]
pull_request:
branches: [main, master]

name: R-CMD-check
name: R-CMD-check.yaml

permissions: read-all

jobs:
R-CMD-check:
Expand All @@ -24,20 +25,20 @@ jobs:
config:
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
# Use 3.6 to trigger usage of RTools35
- {os: windows-latest, r: '3.6'}
# use 4.1 to check with rtools40's older compiler
- {os: windows-latest, r: '4.1'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'release'}

# use 4.0 or 4.1 to check with rtools40's older compiler
- {os: windows-latest, r: 'oldrel-4'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}

- {os: ubuntu-latest, r: 'oldrel-1'}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

Expand All @@ -46,6 +47,7 @@ jobs:
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true
working-directory: pkg/caret

- uses: r-lib/actions/setup-r-dependencies@v2
with:
Expand All @@ -56,4 +58,5 @@ jobs:
- uses: r-lib/actions/check-r-package@v2
with:
upload-snapshots: true
build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
working-directory: pkg/caret
12 changes: 9 additions & 3 deletions .github/workflows/pr-commands.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ on:
issue_comment:
types: [created]

name: Commands
name: pr-commands.yaml

permissions: read-all

jobs:
document:
Expand All @@ -13,8 +15,10 @@ jobs:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
permissions:
contents: write
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/pr-fetch@v2
with:
Expand Down Expand Up @@ -51,8 +55,10 @@ jobs:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
permissions:
contents: write
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/pr-fetch@v2
with:
Expand Down
29 changes: 21 additions & 8 deletions .github/workflows/test-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ on:
push:
branches: [main, master]
pull_request:
branches: [main, master]

name: test-coverage
name: test-coverage.yaml

permissions: read-all

jobs:
test-coverage:
Expand All @@ -15,38 +16,50 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true
working-directory: pkg/caret

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::covr
extra-packages: any::covr, any::xml2
needs: coverage
working-directory: pkg/caret

- name: Test coverage
run: |
covr::codecov(
cov <- covr::package_coverage(
quiet = FALSE,
clean = FALSE,
path = "pkg/caret",
install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
)
covr::to_cobertura(cov)
shell: Rscript {0}

- uses: codecov/codecov-action@v4
with:
# Fail if error if not on PR, or if on PR and token is given
fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}
file: ./cobertura.xml
plugin: noop
disable_search: true
token: ${{ secrets.CODECOV_TOKEN }}
working-directory: pkg/caret

- name: Show testthat output
if: always()
run: |
## --------------------------------------------------------------------
find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
shell: bash

- name: Upload test results
if: failure()
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: coverage-test-failures
path: ${{ runner.temp }}/package
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[![R-CMD-check](https://github.com/topepo/caret/workflows/R-CMD-check/badge.svg)](https://github.com/topepo/caret/actions)
[![Coverage Status](https://coveralls.io/repos/topepo/caret/badge.svg?branch=master)](https://coveralls.io/r/topepo/caret?branch=master)
![R-CMD-check](https://github.com/topepo/caret/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/topepo/caret/actions/workflows/R-CMD-check.yaml)
[![Codecov test coverage](https://codecov.io/gh/topepo/caret/graph/badge.svg)](https://app.codecov.io/gh/topepo/caret)
[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/caret)](http://cran.r-project.org/web/packages/caret)
[![Downloads](http://cranlogs.r-pkg.org/badges/caret)](http://cran.rstudio.com/package=caret)

Expand Down
3 changes: 1 addition & 2 deletions pkg/caret/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,10 @@ Suggests:
rmarkdown,
rpart,
spls,
subselect,
superpc,
testthat (>= 0.9.1),
themis (>= 0.1.3)
VignetteBuilder:
knitr
Encoding: UTF-8
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
4 changes: 2 additions & 2 deletions pkg/caret/R/calibration.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#' \code{envir} argument in \code{eval}, e.g., a list or an environment) containing values for any
#' variables in the formula, as well as \code{groups} and \code{subset} if applicable. If not found in
#' \code{data}, or if \code{data} is unspecified, the variables are looked for in the environment of the
#' formula. This argument is not used for \code{xyplot.calibration}. For {ggplot.calibration}, \code{data}
#' should be an object of class "\code{calibration}"."
#' formula. This argument is not used for \code{xyplot.calibration}. For \code{ggplot.calibration}, \code{data}
#' should be an object of class "\code{calibration}".
#'
#' @param class a character string for the class of interest
#'
Expand Down
70 changes: 32 additions & 38 deletions pkg/caret/R/findCorrelation.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ findCorrelation_fast <- function(x, cutoff = .90, verbose = FALSE){
averageCorr <- as.numeric(as.factor(averageCorr))
x[lower.tri(x, diag = TRUE)] <- NA
combsAboveCutoff <- which(abs(x) > cutoff)

colsToCheck <- ceiling(combsAboveCutoff / nrow(x))
rowsToCheck <- combsAboveCutoff %% nrow(x)

colsToDiscard <- averageCorr[colsToCheck] > averageCorr[rowsToCheck]
rowsToDiscard <- !colsToDiscard

if(verbose){
colsFlagged <- pmin(ifelse(colsToDiscard, colsToCheck, NA),
ifelse(rowsToDiscard, rowsToCheck, NA), na.rm = TRUE)
Expand All @@ -22,7 +22,7 @@ findCorrelation_fast <- function(x, cutoff = .90, verbose = FALSE){
'\n \t Flagging column', colsFlagged, '\n'
))
}

deletecol <- c(colsToCheck[colsToDiscard], rowsToCheck[rowsToDiscard])
deletecol <- unique(deletecol)
deletecol
Expand All @@ -31,29 +31,29 @@ findCorrelation_fast <- function(x, cutoff = .90, verbose = FALSE){
findCorrelation_exact <- function(x, cutoff = 0.90, verbose = FALSE)
{
varnum <- dim(x)[1]

if (!isTRUE(all.equal(x, t(x)))) stop("correlation matrix is not symmetric")
if (varnum == 1) stop("only one variable given")

x <- abs(x)

# re-ordered columns based on max absolute correlation
originalOrder <- 1:varnum

averageCorr <- function(x) mean(x, na.rm = TRUE)
tmp <- x
diag(tmp) <- NA

maxAbsCorOrder <- order(apply(tmp, 2, averageCorr), decreasing = TRUE)
x <- x[maxAbsCorOrder, maxAbsCorOrder]
newOrder <- originalOrder[maxAbsCorOrder]
rm(tmp)

deletecol <- rep(FALSE, varnum)

x2 <- x
diag(x2) <- NA

for (i in 1:(varnum - 1)) {
if(!any(x2[!is.na(x2)] > cutoff)){
if (verbose) cat("All correlations <=", cutoff, "\n")
Expand All @@ -62,13 +62,13 @@ findCorrelation_exact <- function(x, cutoff = 0.90, verbose = FALSE)
if (deletecol[i]) next
for (j in (i + 1):varnum) {
if (!deletecol[i] & !deletecol[j]) {

if (x[i, j] > cutoff) {
mn1 <- mean(x2[i,], na.rm = TRUE)
mn2 <- mean(x2[-j,], na.rm = TRUE)
if(verbose) cat("Compare row", newOrder[i],
" and column ", newOrder[j],
"with corr ", round(x[i,j], 3), "\n")
if(verbose) cat("Compare row", newOrder[i],
" and column ", newOrder[j],
"with corr ", round(x[i,j], 3), "\n")
if (verbose) cat(" Means: ", round(mn1, 3), "vs", round(mn2, 3))
if (mn1 > mn2) {
deletecol[i] <- TRUE
Expand All @@ -92,28 +92,22 @@ findCorrelation_exact <- function(x, cutoff = 0.90, verbose = FALSE)


#' Determine highly correlated variables
#'
#'
#' This function searches through a correlation matrix and returns a vector of
#' integers corresponding to columns to remove to reduce pair-wise
#' correlations.
#'
#'
#' The absolute values of pair-wise correlations are considered. If two
#' variables have a high correlation, the function looks at the mean absolute
#' correlation of each variable and removes the variable with the largest mean
#' absolute correlation.
#'
#'
#' Using \code{exact = TRUE} will cause the function to re-evaluate the average
#' correlations at each step while \code{exact = FALSE} uses all the
#' correlations regardless of whether they have been eliminated or not. The
#' exact calculations will remove a smaller number of predictors but can be
#' much slower when the problem dimensions are "big".
#'
#' There are several function in the \pkg{subselect} package
#' (\code{\link[subselect:eleaps]{leaps}},
#' \code{\link[subselect:genetic]{genetic}},
#' \code{\link[subselect:anneal]{anneal}}) that can also be used to accomplish
#' the same goal but tend to retain more predictors.
#'
#'
#' @param x A correlation matrix
#' @param cutoff A numeric value for the pair-wise absolute correlation cutoff
#' @param verbose A boolean for printing the details
Expand All @@ -130,38 +124,38 @@ findCorrelation_exact <- function(x, cutoff = 0.90, verbose = FALSE)
#' \code{\link[subselect:anneal]{anneal}}, \code{\link{findLinearCombos}}
#' @keywords manip
#' @examples
#'
#' R1 <- structure(c(1, 0.86, 0.56, 0.32, 0.85, 0.86, 1, 0.01, 0.74, 0.32,
#'
#' R1 <- structure(c(1, 0.86, 0.56, 0.32, 0.85, 0.86, 1, 0.01, 0.74, 0.32,
#' 0.56, 0.01, 1, 0.65, 0.91, 0.32, 0.74, 0.65, 1, 0.36,
#' 0.85, 0.32, 0.91, 0.36, 1),
#' 0.85, 0.32, 0.91, 0.36, 1),
#' .Dim = c(5L, 5L))
#' colnames(R1) <- rownames(R1) <- paste0("x", 1:ncol(R1))
#' R1
#'
#'
#' findCorrelation(R1, cutoff = .6, exact = FALSE)
#' findCorrelation(R1, cutoff = .6, exact = TRUE)
#' findCorrelation(R1, cutoff = .6, exact = TRUE, names = FALSE)
#'
#'
#'
#'
#' R2 <- diag(rep(1, 5))
#' R2[2, 3] <- R2[3, 2] <- .7
#' R2[5, 3] <- R2[3, 5] <- -.7
#' R2[4, 1] <- R2[1, 4] <- -.67
#'
#'
#' corrDF <- expand.grid(row = 1:5, col = 1:5)
#' corrDF$correlation <- as.vector(R2)
#' levelplot(correlation ~ row + col, corrDF)
#'
#'
#' findCorrelation(R2, cutoff = .65, verbose = TRUE)
#'
#'
#' findCorrelation(R2, cutoff = .99, verbose = TRUE)
#'
#'
#' @export findCorrelation
findCorrelation <- function(x, cutoff = 0.90, verbose = FALSE, names = FALSE, exact = ncol(x) < 100) {
if(names & is.null(colnames(x)))
stop("'x' must have column names when `names = TRUE`")
out <- if(exact)
findCorrelation_exact(x = x, cutoff = cutoff, verbose = verbose) else
out <- if(exact)
findCorrelation_exact(x = x, cutoff = cutoff, verbose = verbose) else
findCorrelation_fast(x = x, cutoff = cutoff, verbose = verbose)
out
if(names) out <- colnames(x)[out]
Expand Down
Loading
Loading