diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R index 73b6b6443..7e7c2129b 100644 --- a/R/sparsevctrs.R +++ b/R/sparsevctrs.R @@ -1,3 +1,21 @@ +#' Using sparse data with parsnip +#' +#' You can figure out whether a given model engine supports sparse data by +#' calling `get_encoding("name of model")` and looking at the `allow_sparse_x` +#' column. +#' +#' Using sparse data for model fitting and prediction shouldn't require any +#' additional configurations. Just pass in a sparse matrix such as dgCMatrix +#' from the `Matrix` package or a sparse tibble from the sparsevctrs package +#' to the data argument of [fit()], [fit_xy()], and [predict()]. +#' +#' Models that don't support sparse data will try to convert to non-sparse data +#' with warnings. If conversion isn’t possible, an informative error will be +#' thrown. +#' +#' @name sparse_data +NULL + to_sparse_data_frame <- function(x, object, call = rlang::caller_env()) { if (is_sparse_matrix(x)) { if (allow_sparse(object)) { diff --git a/_pkgdown.yml b/_pkgdown.yml index 78e5d56cc..e6868a7a3 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -90,6 +90,7 @@ reference: - set_engine - set_mode - show_engines + - sparse_data - tidy.model_fit - translate - starts_with("update") diff --git a/man/details_boost_tree_xgboost.Rd b/man/details_boost_tree_xgboost.Rd index dd8bab2de..721da3d97 100644 --- a/man/details_boost_tree_xgboost.Rd +++ b/man/details_boost_tree_xgboost.Rd @@ -131,6 +131,14 @@ The \code{fit()} and \code{fit_xy()} arguments have arguments called \code{case_weights} that expect vectors of case weights. } +\subsection{Sparse Data}{ + +This model can utilize sparse data during model fitting and prediction. +Both sparse matrices such as dgCMatrix from the \code{Matrix} package and +sparse tibbles from the \code{sparsevctrs} package are supported. See +\link{sparse_data} for more information. +} + \subsection{Other details}{ \subsection{Interfacing with the \code{params} argument}{ diff --git a/man/details_linear_reg_glmnet.Rd b/man/details_linear_reg_glmnet.Rd index 3092e1d63..f7a7a3605 100644 --- a/man/details_linear_reg_glmnet.Rd +++ b/man/details_linear_reg_glmnet.Rd @@ -68,6 +68,14 @@ The \code{fit()} and \code{fit_xy()} arguments have arguments called \code{case_weights} that expect vectors of case weights. } +\subsection{Sparse Data}{ + +This model can utilize sparse data during model fitting and prediction. +Both sparse matrices such as dgCMatrix from the \code{Matrix} package and +sparse tibbles from the \code{sparsevctrs} package are supported. See +\link{sparse_data} for more information. +} + \subsection{Saving fitted model objects}{ This model object contains data that are not required to make diff --git a/man/details_logistic_reg_LiblineaR.Rd b/man/details_logistic_reg_LiblineaR.Rd index 5aeaa83d7..4de369036 100644 --- a/man/details_logistic_reg_LiblineaR.Rd +++ b/man/details_logistic_reg_LiblineaR.Rd @@ -61,6 +61,14 @@ center and scale each so that each predictor has mean zero and a variance of one. } +\subsection{Sparse Data}{ + +This model can utilize sparse data during model fitting and prediction. +Both sparse matrices such as dgCMatrix from the \code{Matrix} package and +sparse tibbles from the \code{sparsevctrs} package are supported. See +\link{sparse_data} for more information. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/details_logistic_reg_glmnet.Rd b/man/details_logistic_reg_glmnet.Rd index 2b13c3698..a71cf1f4f 100644 --- a/man/details_logistic_reg_glmnet.Rd +++ b/man/details_logistic_reg_glmnet.Rd @@ -73,6 +73,14 @@ The \code{fit()} and \code{fit_xy()} arguments have arguments called \code{case_weights} that expect vectors of case weights. } +\subsection{Sparse Data}{ + +This model can utilize sparse data during model fitting and prediction. +Both sparse matrices such as dgCMatrix from the \code{Matrix} package and +sparse tibbles from the \code{sparsevctrs} package are supported. See +\link{sparse_data} for more information. +} + \subsection{Saving fitted model objects}{ This model object contains data that are not required to make diff --git a/man/details_multinom_reg_glmnet.Rd b/man/details_multinom_reg_glmnet.Rd index ae6043c31..05a6416f7 100644 --- a/man/details_multinom_reg_glmnet.Rd +++ b/man/details_multinom_reg_glmnet.Rd @@ -79,6 +79,14 @@ The \code{fit()} and \code{fit_xy()} arguments have arguments called \code{case_weights} that expect vectors of case weights. } +\subsection{Sparse Data}{ + +This model can utilize sparse data during model fitting and prediction. +Both sparse matrices such as dgCMatrix from the \code{Matrix} package and +sparse tibbles from the \code{sparsevctrs} package are supported. See +\link{sparse_data} for more information. +} + \subsection{Saving fitted model objects}{ This model object contains data that are not required to make diff --git a/man/details_rand_forest_ranger.Rd b/man/details_rand_forest_ranger.Rd index b7bb1f813..1253d39fc 100644 --- a/man/details_rand_forest_ranger.Rd +++ b/man/details_rand_forest_ranger.Rd @@ -123,6 +123,14 @@ The \code{fit()} and \code{fit_xy()} arguments have arguments called \code{case_weights} that expect vectors of case weights. } +\subsection{Sparse Data}{ + +This model can utilize sparse data during model fitting and prediction. +Both sparse matrices such as dgCMatrix from the \code{Matrix} package and +sparse tibbles from the \code{sparsevctrs} package are supported. See +\link{sparse_data} for more information. +} + \subsection{Saving fitted model objects}{ This model object contains data that are not required to make diff --git a/man/details_svm_linear_LiblineaR.Rd b/man/details_svm_linear_LiblineaR.Rd index ac1f786c1..b52638165 100644 --- a/man/details_svm_linear_LiblineaR.Rd +++ b/man/details_svm_linear_LiblineaR.Rd @@ -97,6 +97,14 @@ variance of one. The underlying model implementation does not allow for case weights. } +\subsection{Sparse Data}{ + +This model can utilize sparse data during model fitting and prediction. +Both sparse matrices such as dgCMatrix from the \code{Matrix} package and +sparse tibbles from the \code{sparsevctrs} package are supported. See +\link{sparse_data} for more information. +} + \subsection{Examples}{ The “Fitting and Predicting with parsnip” article contains diff --git a/man/rmd/boost_tree_xgboost.Rmd b/man/rmd/boost_tree_xgboost.Rmd index 7aaacd97e..0049593de 100644 --- a/man/rmd/boost_tree_xgboost.Rmd +++ b/man/rmd/boost_tree_xgboost.Rmd @@ -65,6 +65,11 @@ For classification, non-numeric outcomes (i.e., factors) are internally converte ```{r child = "template-uses-case-weights.Rmd"} ``` +## Sparse Data + +```{r child = "template-uses-sparse-data.Rmd"} +``` + ## Other details ### Interfacing with the `params` argument diff --git a/man/rmd/boost_tree_xgboost.md b/man/rmd/boost_tree_xgboost.md index dd065e73b..5ad594062 100644 --- a/man/rmd/boost_tree_xgboost.md +++ b/man/rmd/boost_tree_xgboost.md @@ -116,6 +116,11 @@ This model can utilize case weights during model fitting. To use them, see the d The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. +## Sparse Data + + +This model can utilize sparse data during model fitting and prediction. Both sparse matrices such as dgCMatrix from the `Matrix` package and sparse tibbles from the `sparsevctrs` package are supported. See [sparse_data] for more information. + ## Other details ### Interfacing with the `params` argument diff --git a/man/rmd/linear_reg_glmnet.Rmd b/man/rmd/linear_reg_glmnet.Rmd index bf92c0cc1..0177387ce 100644 --- a/man/rmd/linear_reg_glmnet.Rmd +++ b/man/rmd/linear_reg_glmnet.Rmd @@ -48,6 +48,11 @@ By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center ```{r child = "template-uses-case-weights.Rmd"} ``` +## Sparse Data + +```{r child = "template-uses-sparse-data.Rmd"} +``` + ## Saving fitted model objects ```{r child = "template-butcher.Rmd"} diff --git a/man/rmd/linear_reg_glmnet.md b/man/rmd/linear_reg_glmnet.md index 3f69fa00c..b2f74d885 100644 --- a/man/rmd/linear_reg_glmnet.md +++ b/man/rmd/linear_reg_glmnet.md @@ -57,6 +57,11 @@ This model can utilize case weights during model fitting. To use them, see the d The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. +## Sparse Data + + +This model can utilize sparse data during model fitting and prediction. Both sparse matrices such as dgCMatrix from the `Matrix` package and sparse tibbles from the `sparsevctrs` package are supported. See [sparse_data] for more information. + ## Saving fitted model objects diff --git a/man/rmd/logistic_reg_LiblineaR.Rmd b/man/rmd/logistic_reg_LiblineaR.Rmd index f2bdfcfa9..a69099494 100644 --- a/man/rmd/logistic_reg_LiblineaR.Rmd +++ b/man/rmd/logistic_reg_LiblineaR.Rmd @@ -42,6 +42,11 @@ logistic_reg(penalty = double(1), mixture = double(1)) %>% ```{r child = "template-same-scale.Rmd"} ``` +## Sparse Data + +```{r child = "template-uses-sparse-data.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-LiblineaR) for `logistic_reg()` with the `"LiblineaR"` engine. diff --git a/man/rmd/logistic_reg_LiblineaR.md b/man/rmd/logistic_reg_LiblineaR.md index 761092a85..6da4a8430 100644 --- a/man/rmd/logistic_reg_LiblineaR.md +++ b/man/rmd/logistic_reg_LiblineaR.md @@ -49,6 +49,11 @@ Factor/categorical predictors need to be converted to numeric values (e.g., dumm Predictors should have the same scale. One way to achieve this is to center and scale each so that each predictor has mean zero and a variance of one. +## Sparse Data + + +This model can utilize sparse data during model fitting and prediction. Both sparse matrices such as dgCMatrix from the `Matrix` package and sparse tibbles from the `sparsevctrs` package are supported. See [sparse_data] for more information. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#logistic-reg-LiblineaR) for `logistic_reg()` with the `"LiblineaR"` engine. diff --git a/man/rmd/logistic_reg_glmnet.Rmd b/man/rmd/logistic_reg_glmnet.Rmd index 1b433b413..52d708af4 100644 --- a/man/rmd/logistic_reg_glmnet.Rmd +++ b/man/rmd/logistic_reg_glmnet.Rmd @@ -50,6 +50,11 @@ By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center ```{r child = "template-uses-case-weights.Rmd"} ``` +## Sparse Data + +```{r child = "template-uses-sparse-data.Rmd"} +``` + ## Saving fitted model objects ```{r child = "template-butcher.Rmd"} diff --git a/man/rmd/logistic_reg_glmnet.md b/man/rmd/logistic_reg_glmnet.md index 61fc5bbee..d4c19eff0 100644 --- a/man/rmd/logistic_reg_glmnet.md +++ b/man/rmd/logistic_reg_glmnet.md @@ -59,6 +59,11 @@ This model can utilize case weights during model fitting. To use them, see the d The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. +## Sparse Data + + +This model can utilize sparse data during model fitting and prediction. Both sparse matrices such as dgCMatrix from the `Matrix` package and sparse tibbles from the `sparsevctrs` package are supported. See [sparse_data] for more information. + ## Saving fitted model objects diff --git a/man/rmd/multinom_reg_glmnet.Rmd b/man/rmd/multinom_reg_glmnet.Rmd index 514bc32fa..230031967 100644 --- a/man/rmd/multinom_reg_glmnet.Rmd +++ b/man/rmd/multinom_reg_glmnet.Rmd @@ -54,6 +54,11 @@ The "Fitting and Predicting with parsnip" article contains [examples](https://pa ```{r child = "template-uses-case-weights.Rmd"} ``` +## Sparse Data + +```{r child = "template-uses-sparse-data.Rmd"} +``` + ## Saving fitted model objects ```{r child = "template-butcher.Rmd"} diff --git a/man/rmd/multinom_reg_glmnet.md b/man/rmd/multinom_reg_glmnet.md index da4d291c2..1914e0860 100644 --- a/man/rmd/multinom_reg_glmnet.md +++ b/man/rmd/multinom_reg_glmnet.md @@ -63,6 +63,11 @@ This model can utilize case weights during model fitting. To use them, see the d The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. +## Sparse Data + + +This model can utilize sparse data during model fitting and prediction. Both sparse matrices such as dgCMatrix from the `Matrix` package and sparse tibbles from the `sparsevctrs` package are supported. See [sparse_data] for more information. + ## Saving fitted model objects diff --git a/man/rmd/rand_forest_ranger.Rmd b/man/rmd/rand_forest_ranger.Rmd index 45d22e83a..e7dfcde09 100644 --- a/man/rmd/rand_forest_ranger.Rmd +++ b/man/rmd/rand_forest_ranger.Rmd @@ -72,6 +72,11 @@ For `ranger` confidence intervals, the intervals are constructed using the form ```{r child = "template-uses-case-weights.Rmd"} ``` +## Sparse Data + +```{r child = "template-uses-sparse-data.Rmd"} +``` + ## Saving fitted model objects ```{r child = "template-butcher.Rmd"} diff --git a/man/rmd/rand_forest_ranger.md b/man/rmd/rand_forest_ranger.md index 3af03422d..c4e20f1d1 100644 --- a/man/rmd/rand_forest_ranger.md +++ b/man/rmd/rand_forest_ranger.md @@ -103,6 +103,11 @@ This model can utilize case weights during model fitting. To use them, see the d The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. +## Sparse Data + + +This model can utilize sparse data during model fitting and prediction. Both sparse matrices such as dgCMatrix from the `Matrix` package and sparse tibbles from the `sparsevctrs` package are supported. See [sparse_data] for more information. + ## Saving fitted model objects diff --git a/man/rmd/svm_linear_LiblineaR.Rmd b/man/rmd/svm_linear_LiblineaR.Rmd index 53dea98c1..36ba5ed8e 100644 --- a/man/rmd/svm_linear_LiblineaR.Rmd +++ b/man/rmd/svm_linear_LiblineaR.Rmd @@ -66,6 +66,11 @@ Note that the `LiblineaR` engine does not produce class probabilities. When opti ```{r child = "template-no-case-weights.Rmd"} ``` +## Sparse Data + +```{r child = "template-uses-sparse-data.Rmd"} +``` + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-linear-LiblineaR) for `svm_linear()` with the `"LiblineaR"` engine. diff --git a/man/rmd/svm_linear_LiblineaR.md b/man/rmd/svm_linear_LiblineaR.md index e2d9b2e1c..72ff0f300 100644 --- a/man/rmd/svm_linear_LiblineaR.md +++ b/man/rmd/svm_linear_LiblineaR.md @@ -85,6 +85,11 @@ scale each so that each predictor has mean zero and a variance of one. The underlying model implementation does not allow for case weights. +## Sparse Data + + +This model can utilize sparse data during model fitting and prediction. Both sparse matrices such as dgCMatrix from the `Matrix` package and sparse tibbles from the `sparsevctrs` package are supported. See [sparse_data] for more information. + ## Examples The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#svm-linear-LiblineaR) for `svm_linear()` with the `"LiblineaR"` engine. diff --git a/man/rmd/template-uses-sparse-data.Rmd b/man/rmd/template-uses-sparse-data.Rmd new file mode 100644 index 000000000..2b153cfa4 --- /dev/null +++ b/man/rmd/template-uses-sparse-data.Rmd @@ -0,0 +1 @@ +This model can utilize sparse data during model fitting and prediction. Both sparse matrices such as dgCMatrix from the `Matrix` package and sparse tibbles from the `sparsevctrs` package are supported. See [sparse_data] for more information. \ No newline at end of file diff --git a/man/sparse_data.Rd b/man/sparse_data.Rd new file mode 100644 index 000000000..f58dc8e7d --- /dev/null +++ b/man/sparse_data.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sparsevctrs.R +\name{sparse_data} +\alias{sparse_data} +\title{Using sparse data with parsnip} +\description{ +You can figure out whether a given model engine supports sparse data by +calling \code{get_encoding("name of model")} and looking at the \code{allow_sparse_x} +column. +} +\details{ +Using sparse data for model fitting and prediction shouldn't require any +additional configurations. Just pass in a sparse matrix such as dgCMatrix +from the \code{Matrix} package or a sparse tibble from the sparsevctrs package +to the data argument of \code{\link[=fit]{fit()}}, \code{\link[=fit_xy]{fit_xy()}}, and \code{\link[=predict]{predict()}}. + +Models that don't support sparse data will try to convert to non-sparse data +with warnings. If conversion isn’t possible, an informative error will be +thrown. +}