diff --git a/DESCRIPTION b/DESCRIPTION index 76dd020f..ca617738 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,7 +18,7 @@ Depends: R (>= 4.0) Imports: cli (>= 3.3.0), - dials (>= 1.0.0), + dials (>= 1.3.0), doFuture (>= 1.0.0), dplyr (>= 1.1.0), foreach, @@ -33,13 +33,13 @@ Imports: purrr (>= 1.0.0), recipes (>= 1.0.4), rlang (>= 1.1.0), - rsample (>= 1.2.0), + rsample (>= 1.2.1.9000), tibble (>= 3.1.0), tidyr (>= 1.2.0), tidyselect (>= 1.1.2), vctrs (>= 0.6.1), withr, - workflows (>= 1.1.4), + workflows (>= 1.1.4.9000), yardstick (>= 1.3.0) Suggests: C50, @@ -66,4 +66,4 @@ Encoding: UTF-8 Language: en-US LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 diff --git a/NEWS.md b/NEWS.md index 1d25a9ea..2cc4948b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ * The package will now log a backtrace for errors and warnings that occur during tuning. When a tuning process encounters issues, see the new `trace` column in the `collect_notes(.Last.tune.result)` output to find precisely where the error occurred (#873). +* When automatic grids are used, `dials::grid_space_filling()` is now used (instead of `dials::grid_latin_hypercube()`). Overall, the new function produces optimized designs (not depending on random numbers). When using Bayesian models, we will use a Latin Hypercube since we produce 5,000 candidates, which is too slow to do with pre-optimized designs. + # tune 1.2.1 * Addressed issue in `int_pctl()` where the function would error when parallelized using `makePSOCKcluster()` (#885). diff --git a/R/checks.R b/R/checks.R index b7efd4d9..4f62e45a 100644 --- a/R/checks.R +++ b/R/checks.R @@ -116,7 +116,7 @@ check_grid <- function(grid, workflow, pset = NULL, call = caller_env()) { } check_workflow(workflow, pset = pset, check_dials = TRUE, call = call) - grid <- dials::grid_latin_hypercube(pset, size = grid) + grid <- dials::grid_space_filling(pset, size = grid) grid <- dplyr::distinct(grid) } diff --git a/R/tune_bayes.R b/R/tune_bayes.R index e8eb936e..8a580114 100644 --- a/R/tune_bayes.R +++ b/R/tune_bayes.R @@ -531,7 +531,7 @@ create_initial_set <- function(param, n = NULL, checks) { if (any(checks == "bayes")) { check_bayes_initial_size(nrow(param), n) } - dials::grid_latin_hypercube(param, size = n) + dials::grid_space_filling(param, size = n) } check_iter <- function(iter, call) { @@ -632,7 +632,7 @@ fit_gp <- function(dat, pset, metric, eval_time = NULL, control, ...) { pred_gp <- function(object, pset, size = 5000, current = NULL, control) { pred_grid <- - dials::grid_latin_hypercube(pset, size = size) %>% + dials::grid_space_filling(pset, size = size, type = "latin_hypercube") %>% dplyr::distinct() if (!is.null(current)) { diff --git a/R/tune_grid.R b/R/tune_grid.R index 07a9601c..94034f23 100644 --- a/R/tune_grid.R +++ b/R/tune_grid.R @@ -58,9 +58,8 @@ #' #' @section Parameter Grids: #' -#' If no tuning grid is provided, a semi-random grid (via -#' [dials::grid_latin_hypercube()]) is created with 10 candidate parameter -#' combinations. +#' If no tuning grid is provided, a grid (via [dials::grid_space_filling()]) is +#' created with 10 candidate parameter combinations. #' #' When provided, the grid should have column names for each parameter and #' these should be named by the parameter name or `id`. For example, if a diff --git a/inst/WORDLIST b/inst/WORDLIST index 174738ed..af0bf9c3 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -6,7 +6,6 @@ Codecov Davison Disambiguates EI -foreach Hinkley Isomap Lifecycle @@ -15,10 +14,12 @@ Olshen PSOCK RNGkind Wadsworth +backtrace cdot doi el finetune +foreach frac geo ggplot diff --git a/man/tune_grid.Rd b/man/tune_grid.Rd index 560f42ac..fbc2d303 100644 --- a/man/tune_grid.Rd +++ b/man/tune_grid.Rd @@ -107,9 +107,8 @@ end of processing. \section{Parameter Grids}{ -If no tuning grid is provided, a semi-random grid (via -\code{\link[dials:grid_max_entropy]{dials::grid_latin_hypercube()}}) is created with 10 candidate parameter -combinations. +If no tuning grid is provided, a grid (via \code{\link[dials:grid_space_filling]{dials::grid_space_filling()}}) is +created with 10 candidate parameter combinations. When provided, the grid should have column names for each parameter and these should be named by the parameter name or \code{id}. For example, if a diff --git a/tests/testthat/_snaps/bayes.md b/tests/testthat/_snaps/bayes.md index fca3d508..0381d278 100644 --- a/tests/testthat/_snaps/bayes.md +++ b/tests/testthat/_snaps/bayes.md @@ -170,29 +170,29 @@ -- Iteration 1 ----------------------------------------------------------------- - i Current best: rmse=2.453 (@iter 0) + i Current best: rmse=2.461 (@iter 0) i Gaussian process model ! The Gaussian process model is being fit using 1 features but only has 2 data points to do so. This may cause errors or a poor model fit. v Gaussian process model i Generating 3 candidates i Predicted candidates - i num_comp=4 + i num_comp=5 i Estimating performance v Estimating performance - (x) Newest results: rmse=2.461 (+/-0.37) + <3 Newest results: rmse=2.453 (+/-0.381) -- Iteration 2 ----------------------------------------------------------------- - i Current best: rmse=2.453 (@iter 0) + i Current best: rmse=2.453 (@iter 1) i Gaussian process model v Gaussian process model i Generating 2 candidates i Predicted candidates - i num_comp=3 + i num_comp=1 i Estimating performance v Estimating performance - <3 Newest results: rmse=2.418 (+/-0.357) + (x) Newest results: rmse=2.646 (+/-0.286) Output # Tuning results # 10-fold cross-validation @@ -225,14 +225,14 @@ -- Iteration 1 ----------------------------------------------------------------- - i Current best: rmse=2.453 (@iter 0) + i Current best: rmse=2.461 (@iter 0) i Gaussian process model ! The Gaussian process model is being fit using 1 features but only has 2 data points to do so. This may cause errors or a poor model fit. v Gaussian process model i Generating 3 candidates i Predicted candidates - i num_comp=4 + i num_comp=5 i Estimating performance i Fold01: preprocessor 1/1 v Fold01: preprocessor 1/1 @@ -295,16 +295,16 @@ i Fold10: preprocessor 1/1, model 1/1 (extracts) i Fold10: preprocessor 1/1, model 1/1 (predictions) v Estimating performance - (x) Newest results: rmse=2.461 (+/-0.37) + <3 Newest results: rmse=2.453 (+/-0.381) -- Iteration 2 ----------------------------------------------------------------- - i Current best: rmse=2.453 (@iter 0) + i Current best: rmse=2.453 (@iter 1) i Gaussian process model v Gaussian process model i Generating 2 candidates i Predicted candidates - i num_comp=3 + i num_comp=1 i Estimating performance i Fold01: preprocessor 1/1 v Fold01: preprocessor 1/1 @@ -367,7 +367,7 @@ i Fold10: preprocessor 1/1, model 1/1 (extracts) i Fold10: preprocessor 1/1, model 1/1 (predictions) v Estimating performance - <3 Newest results: rmse=2.418 (+/-0.357) + (x) Newest results: rmse=2.646 (+/-0.286) Output # Tuning results # 10-fold cross-validation @@ -523,12 +523,6 @@ data points to do so. This may cause errors or a poor model fit. ! For the rsq estimates, 1 missing value was found and removed before fitting the Gaussian process model. - ! For the rsq estimates, 1 missing value was found and removed before fitting - the Gaussian process model. - ! For the rsq estimates, 1 missing value was found and removed before fitting - the Gaussian process model. - ! For the rsq estimates, 1 missing value was found and removed before fitting - the Gaussian process model. ! validation: internal: A correlation computation is required, but `estimate` is constant and ha... ! For the rsq estimates, 2 missing values were found and removed before fitting the Gaussian process model. @@ -545,6 +539,16 @@ ! For the rsq estimates, 6 missing values were found and removed before fitting the Gaussian process model. ! validation: internal: A correlation computation is required, but `estimate` is constant and ha... + ! For the rsq estimates, 7 missing values were found and removed before + fitting the Gaussian process model. + ! validation: internal: A correlation computation is required, but `estimate` is constant and ha... + ! For the rsq estimates, 8 missing values were found and removed before + fitting the Gaussian process model. + ! validation: internal: A correlation computation is required, but `estimate` is constant and ha... + ! For the rsq estimates, 9 missing values were found and removed before + fitting the Gaussian process model. + ! validation: internal: A correlation computation is required, but `estimate` is constant and ha... + ! No improvement for 10 iterations; returning current results. --- diff --git a/tests/testthat/_snaps/fit_best.md b/tests/testthat/_snaps/fit_best.md index 3b86c704..3e8e951e 100644 --- a/tests/testthat/_snaps/fit_best.md +++ b/tests/testthat/_snaps/fit_best.md @@ -4,8 +4,8 @@ fit_best(knn_pca_res, verbose = TRUE) Output Using rmse as the metric, the optimal parameters were: - neighbors: 10 - num_comp: 3 + neighbors: 1 + num_comp: 4 Message i Fitting using 161 data points... @@ -23,13 +23,13 @@ -- Model ----------------------------------------------------------------------- Call: - kknn::train.kknn(formula = ..y ~ ., data = data, ks = min_rows(10L, data, 5)) + kknn::train.kknn(formula = ..y ~ ., data = data, ks = min_rows(1L, data, 5)) Type of response variable: continuous - minimal mean absolute error: 1.690086 - Minimal mean squared error: 4.571625 + minimal mean absolute error: 1.015528 + Minimal mean squared error: 2.448261 Best kernel: optimal - Best k: 10 + Best k: 1 --- diff --git a/tests/testthat/_snaps/grid.md b/tests/testthat/_snaps/grid.md index 89cba326..d2682600 100644 --- a/tests/testthat/_snaps/grid.md +++ b/tests/testthat/_snaps/grid.md @@ -66,14 +66,14 @@ # A tibble: 10 x 4 splits id .metrics .notes - 1 Fold01 - 2 Fold02 - 3 Fold03 - 4 Fold04 - 5 Fold05 - 6 Fold06 - 7 Fold07 - 8 Fold08 - 9 Fold09 - 10 Fold10 + 1 Fold01 + 2 Fold02 + 3 Fold03 + 4 Fold04 + 5 Fold05 + 6 Fold06 + 7 Fold07 + 8 Fold08 + 9 Fold09 + 10 Fold10 diff --git a/tests/testthat/test-autoplot.R b/tests/testthat/test-autoplot.R index 9ab12985..a2ee7f62 100644 --- a/tests/testthat/test-autoplot.R +++ b/tests/testthat/test-autoplot.R @@ -329,12 +329,21 @@ test_that("plot_perf_vs_iter with fairness metrics (#773)", { test_that("regular grid plot", { skip_if_not_installed("ggplot2", minimum_version = "3.5.0") - set.seed(1) - res <- + + svm_spec <- parsnip::svm_rbf(cost = tune()) %>% parsnip::set_engine("kernlab") %>% - parsnip::set_mode("regression") %>% - tune_grid(mpg ~ ., resamples = rsample::vfold_cv(mtcars, v = 5), grid = 1) + parsnip::set_mode("regression") + + svm_grid <- + svm_spec %>% + extract_parameter_set_dials() %>% + dials::grid_regular(levels = 1) + + set.seed(1) + res <- + svm_spec %>% + tune_grid(mpg ~ ., resamples = rsample::vfold_cv(mtcars, v = 5), grid = svm_grid) expect_snapshot( error = TRUE, diff --git a/tests/testthat/test-extract.R b/tests/testthat/test-extract.R index a01d2287..291dc7ab 100644 --- a/tests/testthat/test-extract.R +++ b/tests/testthat/test-extract.R @@ -170,7 +170,7 @@ test_that("tune model and recipe", { grid_3 <- extract_parameter_set_dials(wflow_3) %>% update(num_comp = dials::num_comp(c(2, 5))) %>% - dials::grid_latin_hypercube(size = 4) + dials::grid_space_filling(size = 4) expect_error( res_3_1 <- tune_grid(