From 4584a9b3b86be70140c24037d43c2950ce44eca3 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 22 May 2024 21:36:40 -0700 Subject: [PATCH 01/16] let fit_xy() take dgCMatrix input --- R/convert_data.R | 6 +++++- R/fit.R | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/R/convert_data.R b/R/convert_data.R index 519aec9e3..2a573961a 100644 --- a/R/convert_data.R +++ b/R/convert_data.R @@ -374,7 +374,11 @@ maybe_matrix <- function(x) { "converted to numeric matrix: {non_num_cols}.") rlang::abort(msg) } - x <- as.matrix(x) + if (any(vapply(x, sparsevctrs::is_sparse_vector, logical(1)))) { + x <- sparsevctrs::coerce_to_sparse_matrix(x) + } else { + x <- as.matrix(x) + } } # leave alone if matrix or sparse matrix x diff --git a/R/fit.R b/R/fit.R index 288fc64a2..3553b96f4 100644 --- a/R/fit.R +++ b/R/fit.R @@ -275,6 +275,10 @@ fit_xy.model_spec <- } } + if (allow_sparse(object) && methods::is(x, "sparseMatrix")) { + x <- sparsevctrs::coerce_to_sparse_data_frame(x) + } + cl <- match.call(expand.dots = TRUE) eval_env <- rlang::env() eval_env$x <- x @@ -387,7 +391,7 @@ inher <- function(x, cls, cl) { check_interface <- function(formula, data, cl, model) { inher(formula, "formula", cl) - inher(data, c("data.frame", "tbl_spark"), cl) + inher(data, c("data.frame", "dgCMatrix", "tbl_spark"), cl) # Determine the `fit()` interface form_interface <- !is.null(formula) & !is.null(data) From d1d4df4271b49f183f78180f43fccfad8b9801dd Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 22 May 2024 21:43:24 -0700 Subject: [PATCH 02/16] only turn into sparse data frame if model allow sparse data --- R/fit.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/R/fit.R b/R/fit.R index 3553b96f4..1d115b7aa 100644 --- a/R/fit.R +++ b/R/fit.R @@ -275,8 +275,16 @@ fit_xy.model_spec <- } } - if (allow_sparse(object) && methods::is(x, "sparseMatrix")) { - x <- sparsevctrs::coerce_to_sparse_data_frame(x) + if (methods::is(x, "sparseMatrix")) { + if (allow_sparse(object)) { + x <- sparsevctrs::coerce_to_sparse_data_frame(x) + } else { + cli::cli_warn(c( + "!" = "{.arg x} is a sparse matrix, but model doesn't accept that.", + "i" = "Converted {.arg x} to data.frame." + )) + x <- as.data.frame(x) + } } cl <- match.call(expand.dots = TRUE) From bbe0732a9215d074611665de527088759235946d Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 22 May 2024 21:45:33 -0700 Subject: [PATCH 03/16] add sparsevctrs to imports --- DESCRIPTION | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index bfa80fed1..c605a7c8d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,6 +32,7 @@ Imports: prettyunits, purrr (>= 1.0.0), rlang (>= 1.1.0), + sparsevctrs (>= 0.1.0), stats, tibble (>= 2.1.1), tidyr (>= 1.3.0), @@ -78,5 +79,6 @@ Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) Remotes: - tidymodels/hardhat + tidymodels/hardhat, + r-lib/sparsevctrs RoxygenNote: 7.3.1 From 6e2fb0096b3f1eaf7ab6200cd4378e51a44fa11e Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 12 Jun 2024 22:04:42 -0700 Subject: [PATCH 04/16] delete failing test --- tests/testthat/test-rand_forest_ranger.R | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/testthat/test-rand_forest_ranger.R b/tests/testthat/test-rand_forest_ranger.R index b94db7e7f..a40eb4780 100644 --- a/tests/testthat/test-rand_forest_ranger.R +++ b/tests/testthat/test-rand_forest_ranger.R @@ -475,16 +475,6 @@ test_that('ranger and sparse matrices', { expect_equal(extract_fit_engine(from_df), extract_fit_engine(from_mat)) expect_equal(extract_fit_engine(from_df), extract_fit_engine(from_sparse)) - - rf_spec <- - rand_forest(trees = 10) %>% - set_engine("randomForest", seed = 2) %>% - set_mode("regression") - expect_error( - rf_spec %>% fit_xy(mtcar_smat, mtcars$mpg), - "Sparse matrices not supported" - ) - }) From 44f2f8189ba52bfbf7407967c52adc0a73df3a2b Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 12 Jun 2024 22:04:59 -0700 Subject: [PATCH 05/16] add sparse_hotel_rates() helper function --- tests/testthat/helper-objects.R | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/testthat/helper-objects.R b/tests/testthat/helper-objects.R index a9297a65a..b00c3c2c8 100644 --- a/tests/testthat/helper-objects.R +++ b/tests/testthat/helper-objects.R @@ -24,3 +24,30 @@ is_tf_ok <- function() { } res } + +# ------------------------------------------------------------------------------ +# For sparse tibble testing + +sparse_hotel_rates <- function() { + # 99.2 sparsity + hotel_rates <- modeldata::hotel_rates + + prefix_colnames <- function(x, prefix) { + colnames(x) <- paste(colnames(x), prefix, sep = "_") + x + } + + dummies_country <- hardhat::fct_encode_one_hot(hotel_rates$country) + dummies_company <- hardhat::fct_encode_one_hot(hotel_rates$company) + dummies_agent <- hardhat::fct_encode_one_hot(hotel_rates$agent) + + res <- dplyr::bind_cols( + hotel_rates["avg_price_per_room"], + prefix_colnames(dummies_country, "country"), + prefix_colnames(dummies_company, "company"), + prefix_colnames(dummies_agent, "agent") + ) + + res <- as.matrix(res) + Matrix::Matrix(res, sparse = TRUE) +} \ No newline at end of file From fb14d889f8915b033777609b47a604b87c8800c1 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 12 Jun 2024 22:30:21 -0700 Subject: [PATCH 06/16] add methods to suggests --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index c605a7c8d..984f37adb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -53,6 +53,7 @@ Suggests: LiblineaR, MASS, Matrix, + methods, mgcv, modeldata, nlme, From 28420f6cb55f0c6bcd29bfe9c89bd9e809324683 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 28 Jun 2024 18:30:18 -0700 Subject: [PATCH 07/16] refactor sparevctrs functions out --- R/convert_data.R | 15 ++++++++++----- R/fit.R | 12 +----------- R/sparsevctrs.R | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 16 deletions(-) create mode 100644 R/sparsevctrs.R diff --git a/R/convert_data.R b/R/convert_data.R index 2a573961a..64c93e02c 100644 --- a/R/convert_data.R +++ b/R/convert_data.R @@ -374,16 +374,21 @@ maybe_matrix <- function(x) { "converted to numeric matrix: {non_num_cols}.") rlang::abort(msg) } - if (any(vapply(x, sparsevctrs::is_sparse_vector, logical(1)))) { - x <- sparsevctrs::coerce_to_sparse_matrix(x) - } else { - x <- as.matrix(x) - } + x <- maybe_sparse_matrix(x) } # leave alone if matrix or sparse matrix x } +maybe_sparse_matrix <- function(x) { + if (any(vapply(x, sparsevctrs::is_sparse_vector, logical(1)))) { + res <- sparsevctrs::coerce_to_sparse_matrix(x) + } else { + res <- as.matrix(x) + } + res +} + #' @rdname maybe_matrix #' @export maybe_data_frame <- function(x) { diff --git a/R/fit.R b/R/fit.R index 1d115b7aa..1345a95af 100644 --- a/R/fit.R +++ b/R/fit.R @@ -275,17 +275,7 @@ fit_xy.model_spec <- } } - if (methods::is(x, "sparseMatrix")) { - if (allow_sparse(object)) { - x <- sparsevctrs::coerce_to_sparse_data_frame(x) - } else { - cli::cli_warn(c( - "!" = "{.arg x} is a sparse matrix, but model doesn't accept that.", - "i" = "Converted {.arg x} to data.frame." - )) - x <- as.data.frame(x) - } - } + x <- to_sparse_data_frame(x, object) cl <- match.call(expand.dots = TRUE) eval_env <- rlang::env() diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R new file mode 100644 index 000000000..107c3932e --- /dev/null +++ b/R/sparsevctrs.R @@ -0,0 +1,14 @@ +to_sparse_data_frame <- function(x, object) { + if (methods::is(x, "sparseMatrix")) { + if (allow_sparse(object)) { + x <- sparsevctrs::coerce_to_sparse_data_frame(x) + } else { + cli::cli_warn(c( + "!" = "{.arg x} is a sparse matrix, but model doesn't accept that.", + "i" = "Converted {.arg x} to data.frame." + )) + x <- as.data.frame(x) + } + } + x +} \ No newline at end of file From fddd5907dfce0087870d3463edd77327dcbaa8c3 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 28 Jun 2024 18:30:25 -0700 Subject: [PATCH 08/16] test sparsevctrs functions in use --- tests/testthat/_snaps/sparsevctrs.md | 57 ++++++++++++++++++++ tests/testthat/test-sparsevctrs.R | 81 ++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 tests/testthat/_snaps/sparsevctrs.md create mode 100644 tests/testthat/test-sparsevctrs.R diff --git a/tests/testthat/_snaps/sparsevctrs.md b/tests/testthat/_snaps/sparsevctrs.md new file mode 100644 index 000000000..484030f77 --- /dev/null +++ b/tests/testthat/_snaps/sparsevctrs.md @@ -0,0 +1,57 @@ +# to_sparse_data_frame() is used correctly + + Code + fit_xy(lm_spec, x = mtcars[, -1], y = mtcars[, 1]) + Condition + Error in `to_sparse_data_frame()`: + ! x is not sparse + +--- + + Code + fit_xy(lm_spec, x = hotel_data[, -1], y = hotel_data[, 1]) + Condition + Error in `to_sparse_data_frame()`: + ! x is spare, and sparse is not allowed + +--- + + Code + fit_xy(lm_spec, x = hotel_data[, -1], y = hotel_data[, 1]) + Condition + Error in `to_sparse_data_frame()`: + ! x is spare, and sparse is allowed + +# maybe_sparse_matrix() is used correctly + + Code + fit_xy(lm_spec, x = hotel_data[, -1], y = hotel_data[, 1]) + Condition + Error in `maybe_sparse_matrix()`: + ! sparse vectors detected + +--- + + Code + fit_xy(lm_spec, x = mtcars[, -1], y = mtcars[, 1]) + Condition + Error in `maybe_sparse_matrix()`: + ! no sparse vectors detected + +--- + + Code + fit_xy(lm_spec, x = as.data.frame(mtcars)[, -1], y = as.data.frame(mtcars)[, 1]) + Condition + Error in `maybe_sparse_matrix()`: + ! no sparse vectors detected + +--- + + Code + fit_xy(lm_spec, x = tibble::as_tibble(mtcars)[, -1], y = tibble::as_tibble( + mtcars)[, 1]) + Condition + Error in `maybe_sparse_matrix()`: + ! no sparse vectors detected + diff --git a/tests/testthat/test-sparsevctrs.R b/tests/testthat/test-sparsevctrs.R new file mode 100644 index 000000000..bcfadf738 --- /dev/null +++ b/tests/testthat/test-sparsevctrs.R @@ -0,0 +1,81 @@ +test_that("sparse matrices can be passed to `fit_xy()", { + hotel_data <- sparse_hotel_rates() + + lm_spec <- linear_reg(penalty = 0) %>% + set_engine("glmnet") + + expect_no_error( + lm_fit <- fit_xy(lm_spec, x = hotel_data[, -1], y = hotel_data[, 1]) + ) +}) + +test_that("to_sparse_data_frame() is used correctly", { + local_mocked_bindings( + to_sparse_data_frame = function(x, object) { + if (methods::is(x, "sparseMatrix")) { + if (allow_sparse(object)) { + stop("x is spare, and sparse is allowed") + } else { + stop("x is spare, and sparse is not allowed") + } + } + stop("x is not sparse") + } + ) + + hotel_data <- sparse_hotel_rates() + + lm_spec <- linear_reg(penalty = 0) %>% + set_engine("lm") + + expect_snapshot( + error = TRUE, + fit_xy(lm_spec, x = mtcars[, -1], y = mtcars[, 1]) + ) + expect_snapshot( + error = TRUE, + fit_xy(lm_spec, x = hotel_data[, -1], y = hotel_data[, 1]) + ) + + lm_spec <- linear_reg(penalty = 0) %>% + set_engine("glmnet") + + expect_snapshot( + error = TRUE, + fit_xy(lm_spec, x = hotel_data[, -1], y = hotel_data[, 1]) + ) +}) + +test_that("maybe_sparse_matrix() is used correctly", { + local_mocked_bindings( + maybe_sparse_matrix = function(x) { + if (any(vapply(x, sparsevctrs::is_sparse_vector, logical(1)))) { + stop("sparse vectors detected") + } else { + stop("no sparse vectors detected") + } + } + ) + + hotel_data <- sparse_hotel_rates() + + lm_spec <- linear_reg(penalty = 0) %>% + set_engine("glmnet") + + expect_snapshot( + error = TRUE, + fit_xy(lm_spec, x = hotel_data[, -1], y = hotel_data[, 1]) + ) + expect_snapshot( + error = TRUE, + fit_xy(lm_spec, x = mtcars[, -1], y = mtcars[, 1]) + ) + expect_snapshot( + error = TRUE, + fit_xy(lm_spec, x = as.data.frame(mtcars)[, -1], y = as.data.frame(mtcars)[, 1]) + ) + expect_snapshot( + error = TRUE, + fit_xy(lm_spec, x = tibble::as_tibble(mtcars)[, -1], y = tibble::as_tibble(mtcars)[, 1]) + ) +}) From 2b8f56e47bdf9a3768b99e96cfaf5adb3a6c506e Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 28 Jun 2024 19:43:20 -0700 Subject: [PATCH 09/16] don't use glmnet in sparsevctrs tests --- tests/testthat/_snaps/sparsevctrs.md | 16 ++++++------ tests/testthat/test-sparsevctrs.R | 39 +++++++++++++++++----------- 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/tests/testthat/_snaps/sparsevctrs.md b/tests/testthat/_snaps/sparsevctrs.md index 484030f77..abbf43efb 100644 --- a/tests/testthat/_snaps/sparsevctrs.md +++ b/tests/testthat/_snaps/sparsevctrs.md @@ -1,7 +1,7 @@ # to_sparse_data_frame() is used correctly Code - fit_xy(lm_spec, x = mtcars[, -1], y = mtcars[, 1]) + fit_xy(spec, x = mtcars[, -1], y = mtcars[, 1]) Condition Error in `to_sparse_data_frame()`: ! x is not sparse @@ -9,7 +9,7 @@ --- Code - fit_xy(lm_spec, x = hotel_data[, -1], y = hotel_data[, 1]) + fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1]) Condition Error in `to_sparse_data_frame()`: ! x is spare, and sparse is not allowed @@ -17,7 +17,7 @@ --- Code - fit_xy(lm_spec, x = hotel_data[, -1], y = hotel_data[, 1]) + fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1]) Condition Error in `to_sparse_data_frame()`: ! x is spare, and sparse is allowed @@ -25,7 +25,7 @@ # maybe_sparse_matrix() is used correctly Code - fit_xy(lm_spec, x = hotel_data[, -1], y = hotel_data[, 1]) + fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1]) Condition Error in `maybe_sparse_matrix()`: ! sparse vectors detected @@ -33,7 +33,7 @@ --- Code - fit_xy(lm_spec, x = mtcars[, -1], y = mtcars[, 1]) + fit_xy(spec, x = mtcars[, -1], y = mtcars[, 1]) Condition Error in `maybe_sparse_matrix()`: ! no sparse vectors detected @@ -41,7 +41,7 @@ --- Code - fit_xy(lm_spec, x = as.data.frame(mtcars)[, -1], y = as.data.frame(mtcars)[, 1]) + fit_xy(spec, x = as.data.frame(mtcars)[, -1], y = as.data.frame(mtcars)[, 1]) Condition Error in `maybe_sparse_matrix()`: ! no sparse vectors detected @@ -49,8 +49,8 @@ --- Code - fit_xy(lm_spec, x = tibble::as_tibble(mtcars)[, -1], y = tibble::as_tibble( - mtcars)[, 1]) + fit_xy(spec, x = tibble::as_tibble(mtcars)[, -1], y = tibble::as_tibble(mtcars)[, + 1]) Condition Error in `maybe_sparse_matrix()`: ! no sparse vectors detected diff --git a/tests/testthat/test-sparsevctrs.R b/tests/testthat/test-sparsevctrs.R index bcfadf738..e5d5e0511 100644 --- a/tests/testthat/test-sparsevctrs.R +++ b/tests/testthat/test-sparsevctrs.R @@ -1,15 +1,20 @@ test_that("sparse matrices can be passed to `fit_xy()", { + skip_if_not_installed("LiblineaR") + hotel_data <- sparse_hotel_rates() - lm_spec <- linear_reg(penalty = 0) %>% - set_engine("glmnet") + spec <- svm_linear() %>% + set_mode("regression") %>% + set_engine("LiblineaR") expect_no_error( - lm_fit <- fit_xy(lm_spec, x = hotel_data[, -1], y = hotel_data[, 1]) + lm_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1]) ) }) test_that("to_sparse_data_frame() is used correctly", { + skip_if_not_installed("LiblineaR") + local_mocked_bindings( to_sparse_data_frame = function(x, object) { if (methods::is(x, "sparseMatrix")) { @@ -25,28 +30,31 @@ test_that("to_sparse_data_frame() is used correctly", { hotel_data <- sparse_hotel_rates() - lm_spec <- linear_reg(penalty = 0) %>% + spec <- linear_reg() %>% set_engine("lm") expect_snapshot( error = TRUE, - fit_xy(lm_spec, x = mtcars[, -1], y = mtcars[, 1]) + fit_xy(spec, x = mtcars[, -1], y = mtcars[, 1]) ) expect_snapshot( error = TRUE, - fit_xy(lm_spec, x = hotel_data[, -1], y = hotel_data[, 1]) + fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1]) ) - lm_spec <- linear_reg(penalty = 0) %>% - set_engine("glmnet") + spec <- svm_linear() %>% + set_mode("regression") %>% + set_engine("LiblineaR") expect_snapshot( error = TRUE, - fit_xy(lm_spec, x = hotel_data[, -1], y = hotel_data[, 1]) + fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1]) ) }) test_that("maybe_sparse_matrix() is used correctly", { + skip_if_not_installed("LiblineaR") + local_mocked_bindings( maybe_sparse_matrix = function(x) { if (any(vapply(x, sparsevctrs::is_sparse_vector, logical(1)))) { @@ -59,23 +67,24 @@ test_that("maybe_sparse_matrix() is used correctly", { hotel_data <- sparse_hotel_rates() - lm_spec <- linear_reg(penalty = 0) %>% - set_engine("glmnet") + spec <- svm_linear() %>% + set_mode("regression") %>% + set_engine("LiblineaR") expect_snapshot( error = TRUE, - fit_xy(lm_spec, x = hotel_data[, -1], y = hotel_data[, 1]) + fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1]) ) expect_snapshot( error = TRUE, - fit_xy(lm_spec, x = mtcars[, -1], y = mtcars[, 1]) + fit_xy(spec, x = mtcars[, -1], y = mtcars[, 1]) ) expect_snapshot( error = TRUE, - fit_xy(lm_spec, x = as.data.frame(mtcars)[, -1], y = as.data.frame(mtcars)[, 1]) + fit_xy(spec, x = as.data.frame(mtcars)[, -1], y = as.data.frame(mtcars)[, 1]) ) expect_snapshot( error = TRUE, - fit_xy(lm_spec, x = tibble::as_tibble(mtcars)[, -1], y = tibble::as_tibble(mtcars)[, 1]) + fit_xy(spec, x = tibble::as_tibble(mtcars)[, -1], y = tibble::as_tibble(mtcars)[, 1]) ) }) From ae90e84365697c391f550988c190b745a5e9a771 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 28 Jun 2024 19:47:24 -0700 Subject: [PATCH 10/16] set minimum sparsevctrs version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 984f37adb..3f4587d32 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,7 +32,7 @@ Imports: prettyunits, purrr (>= 1.0.0), rlang (>= 1.1.0), - sparsevctrs (>= 0.1.0), + sparsevctrs (>= 0.1.0.9000), stats, tibble (>= 2.1.1), tidyr (>= 1.3.0), From 124f925b928ebf7b687c33e17e700fdfca17f5fd Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Sat, 29 Jun 2024 15:39:30 -0700 Subject: [PATCH 11/16] trigger warning in to_sparse_data_frame() --- R/sparsevctrs.R | 2 +- tests/testthat/_snaps/sparsevctrs.md | 9 +++++++++ tests/testthat/test-sparsevctrs.R | 8 ++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R index 107c3932e..58d866396 100644 --- a/R/sparsevctrs.R +++ b/R/sparsevctrs.R @@ -7,7 +7,7 @@ to_sparse_data_frame <- function(x, object) { "!" = "{.arg x} is a sparse matrix, but model doesn't accept that.", "i" = "Converted {.arg x} to data.frame." )) - x <- as.data.frame(x) + x <- as.data.frame(as.matrix(x)) } } x diff --git a/tests/testthat/_snaps/sparsevctrs.md b/tests/testthat/_snaps/sparsevctrs.md index abbf43efb..100e33895 100644 --- a/tests/testthat/_snaps/sparsevctrs.md +++ b/tests/testthat/_snaps/sparsevctrs.md @@ -1,3 +1,12 @@ +# sparse matrices can be passed to `fit_xy() + + Code + lm_fit <- fit_xy(spec, x = hotel_data[1:100, -1], y = hotel_data[1:100, 1]) + Condition + Warning: + ! `x` is a sparse matrix, but model doesn't accept that. + i Converted `x` to data.frame. + # to_sparse_data_frame() is used correctly Code diff --git a/tests/testthat/test-sparsevctrs.R b/tests/testthat/test-sparsevctrs.R index e5d5e0511..713cde557 100644 --- a/tests/testthat/test-sparsevctrs.R +++ b/tests/testthat/test-sparsevctrs.R @@ -10,6 +10,14 @@ test_that("sparse matrices can be passed to `fit_xy()", { expect_no_error( lm_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1]) ) + + spec <- linear_reg() %>% + set_mode("regression") %>% + set_engine("lm") + + expect_snapshot( + lm_fit <- fit_xy(spec, x = hotel_data[1:100, -1], y = hotel_data[1:100, 1]) + ) }) test_that("to_sparse_data_frame() is used correctly", { From 7718d679d3aa240f8a6abb4d948288581184caf6 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 27 Aug 2024 16:45:39 -0700 Subject: [PATCH 12/16] document a little more for sparse matrix input --- NEWS.md | 1 + R/fit.R | 5 +++++ man/fit.Rd | 4 ++++ tests/testthat/helper-objects.R | 2 +- 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index c51afb0e7..b5309b4f1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # parsnip (development version) +* `fit_xy()` can now take dgCMatrix input for `x` argument (#1121). * `fit_xy()` currently raises an error for `gen_additive_mod()` model specifications as the default engine (`"mgcv"`) specifies smoothing terms in model formulas. However, some engines specify smooths via additional arguments, in which case the restriction on `fit_xy()` is excessive. parsnip will now only raise an error when fitting a `gen_additive_mod()` with `fit_xy()` when using the `"mgcv"` engine (#775). diff --git a/R/fit.R b/R/fit.R index 58c06d4ac..0cfe7f5b0 100644 --- a/R/fit.R +++ b/R/fit.R @@ -55,6 +55,11 @@ #' a "reverse Kaplan-Meier" curve that models the probability of censoring. This #' may be used later to compute inverse probability censoring weights for #' performance measures. +#' +#' Sparse data is supported, with the use of the `x` argument in `fit_xy()`. See +#' `allow_sparse_x` column of [parsnip::get_encoding()] for sparse input +#' compatibility. +#' #' @examplesIf !parsnip:::is_cran_check() #' # Although `glm()` only has a formula interface, different #' # methods for specifying the model can be used diff --git a/man/fit.Rd b/man/fit.Rd index b4c16f278..332d993bc 100644 --- a/man/fit.Rd +++ b/man/fit.Rd @@ -103,6 +103,10 @@ executed and saved in the parsnip object. The \code{censor_probs} element contai a "reverse Kaplan-Meier" curve that models the probability of censoring. This may be used later to compute inverse probability censoring weights for performance measures. + +Sparse data is supported, with the use of the \code{x} argument in \code{fit_xy()}. See +\code{allow_sparse_x} column of \code{\link[=get_encoding]{get_encoding()}} for sparse input +compatibility. } \examples{ \dontshow{if (!parsnip:::is_cran_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} diff --git a/tests/testthat/helper-objects.R b/tests/testthat/helper-objects.R index b00c3c2c8..f2e24a353 100644 --- a/tests/testthat/helper-objects.R +++ b/tests/testthat/helper-objects.R @@ -50,4 +50,4 @@ sparse_hotel_rates <- function() { res <- as.matrix(res) Matrix::Matrix(res, sparse = TRUE) -} \ No newline at end of file +} From f2faed9778428262d421afd1c51661ace39ba302 Mon Sep 17 00:00:00 2001 From: topepo Date: Wed, 28 Aug 2024 10:10:37 -0400 Subject: [PATCH 13/16] convert warning to error --- R/sparsevctrs.R | 10 ++++------ tests/testthat/_snaps/sparsevctrs.md | 5 ++--- tests/testthat/test-sparsevctrs.R | 9 +++++---- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R index 58d866396..a7a65cf45 100644 --- a/R/sparsevctrs.R +++ b/R/sparsevctrs.R @@ -3,12 +3,10 @@ to_sparse_data_frame <- function(x, object) { if (allow_sparse(object)) { x <- sparsevctrs::coerce_to_sparse_data_frame(x) } else { - cli::cli_warn(c( - "!" = "{.arg x} is a sparse matrix, but model doesn't accept that.", - "i" = "Converted {.arg x} to data.frame." - )) - x <- as.data.frame(as.matrix(x)) + cli::cli_abort( + "{.arg x} is a sparse matrix, but {.fn {class(object)[1]}} with + engine {.code {object$engine}} doesn't accept that.") } } x -} \ No newline at end of file +} diff --git a/tests/testthat/_snaps/sparsevctrs.md b/tests/testthat/_snaps/sparsevctrs.md index 100e33895..02cb9611b 100644 --- a/tests/testthat/_snaps/sparsevctrs.md +++ b/tests/testthat/_snaps/sparsevctrs.md @@ -3,9 +3,8 @@ Code lm_fit <- fit_xy(spec, x = hotel_data[1:100, -1], y = hotel_data[1:100, 1]) Condition - Warning: - ! `x` is a sparse matrix, but model doesn't accept that. - i Converted `x` to data.frame. + Error in `to_sparse_data_frame()`: + ! `x` is a sparse matrix, but `linear_reg()` with engine `lm` doesn't accept that. # to_sparse_data_frame() is used correctly diff --git a/tests/testthat/test-sparsevctrs.R b/tests/testthat/test-sparsevctrs.R index 713cde557..7e6a904bb 100644 --- a/tests/testthat/test-sparsevctrs.R +++ b/tests/testthat/test-sparsevctrs.R @@ -16,13 +16,14 @@ test_that("sparse matrices can be passed to `fit_xy()", { set_engine("lm") expect_snapshot( - lm_fit <- fit_xy(spec, x = hotel_data[1:100, -1], y = hotel_data[1:100, 1]) + lm_fit <- fit_xy(spec, x = hotel_data[1:100, -1], y = hotel_data[1:100, 1]), + error = TRUE ) }) test_that("to_sparse_data_frame() is used correctly", { skip_if_not_installed("LiblineaR") - + local_mocked_bindings( to_sparse_data_frame = function(x, object) { if (methods::is(x, "sparseMatrix")) { @@ -49,7 +50,7 @@ test_that("to_sparse_data_frame() is used correctly", { error = TRUE, fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1]) ) - + spec <- svm_linear() %>% set_mode("regression") %>% set_engine("LiblineaR") @@ -62,7 +63,7 @@ test_that("to_sparse_data_frame() is used correctly", { test_that("maybe_sparse_matrix() is used correctly", { skip_if_not_installed("LiblineaR") - + local_mocked_bindings( maybe_sparse_matrix = function(x) { if (any(vapply(x, sparsevctrs::is_sparse_vector, logical(1)))) { From 616b548a6bbbedd4ece0b0e8213bb3684ae41400 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 28 Aug 2024 09:11:38 -0700 Subject: [PATCH 14/16] use less noisy model/engine for sparsevctrs tests --- tests/testthat/test-sparsevctrs.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test-sparsevctrs.R b/tests/testthat/test-sparsevctrs.R index 713cde557..8f112eb78 100644 --- a/tests/testthat/test-sparsevctrs.R +++ b/tests/testthat/test-sparsevctrs.R @@ -1,11 +1,11 @@ test_that("sparse matrices can be passed to `fit_xy()", { - skip_if_not_installed("LiblineaR") + skip_if_not_installed("xgboost") hotel_data <- sparse_hotel_rates() - spec <- svm_linear() %>% + spec <- boost_tree() %>% set_mode("regression") %>% - set_engine("LiblineaR") + set_engine("xgboost") expect_no_error( lm_fit <- fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1]) @@ -21,7 +21,7 @@ test_that("sparse matrices can be passed to `fit_xy()", { }) test_that("to_sparse_data_frame() is used correctly", { - skip_if_not_installed("LiblineaR") + skip_if_not_installed("xgboost") local_mocked_bindings( to_sparse_data_frame = function(x, object) { @@ -50,9 +50,9 @@ test_that("to_sparse_data_frame() is used correctly", { fit_xy(spec, x = hotel_data[, -1], y = hotel_data[, 1]) ) - spec <- svm_linear() %>% + spec <- boost_tree() %>% set_mode("regression") %>% - set_engine("LiblineaR") + set_engine("xgboost") expect_snapshot( error = TRUE, @@ -61,7 +61,7 @@ test_that("to_sparse_data_frame() is used correctly", { }) test_that("maybe_sparse_matrix() is used correctly", { - skip_if_not_installed("LiblineaR") + skip_if_not_installed("xgboost") local_mocked_bindings( maybe_sparse_matrix = function(x) { @@ -75,9 +75,9 @@ test_that("maybe_sparse_matrix() is used correctly", { hotel_data <- sparse_hotel_rates() - spec <- svm_linear() %>% + spec <- boost_tree() %>% set_mode("regression") %>% - set_engine("LiblineaR") + set_engine("xgboost") expect_snapshot( error = TRUE, From a60a928936fa42b6407c0057778a4edc05b5dae0 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 28 Aug 2024 09:14:00 -0700 Subject: [PATCH 15/16] remove leftover skip_if_not_installed --- tests/testthat/test-sparsevctrs.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/testthat/test-sparsevctrs.R b/tests/testthat/test-sparsevctrs.R index 84b0d0e39..04f673c1c 100644 --- a/tests/testthat/test-sparsevctrs.R +++ b/tests/testthat/test-sparsevctrs.R @@ -24,8 +24,6 @@ test_that("sparse matrices can be passed to `fit_xy()", { test_that("to_sparse_data_frame() is used correctly", { skip_if_not_installed("xgboost") - skip_if_not_installed("LiblineaR") - local_mocked_bindings( to_sparse_data_frame = function(x, object) { if (methods::is(x, "sparseMatrix")) { @@ -68,8 +66,6 @@ test_that("to_sparse_data_frame() is used correctly", { test_that("maybe_sparse_matrix() is used correctly", { skip_if_not_installed("xgboost") - skip_if_not_installed("LiblineaR") - local_mocked_bindings( maybe_sparse_matrix = function(x) { if (any(vapply(x, sparsevctrs::is_sparse_vector, logical(1)))) { From 1971f05b7c086cf9c117f15094483fd48172791d Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 28 Aug 2024 10:03:10 -0700 Subject: [PATCH 16/16] fix merge issue --- tests/testthat/test-sparsevctrs.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/testthat/test-sparsevctrs.R b/tests/testthat/test-sparsevctrs.R index 04f673c1c..2f9027306 100644 --- a/tests/testthat/test-sparsevctrs.R +++ b/tests/testthat/test-sparsevctrs.R @@ -52,8 +52,6 @@ test_that("to_sparse_data_frame() is used correctly", { ) spec <- boost_tree() %>% - - spec <- svm_linear() %>% set_mode("regression") %>% set_engine("xgboost")