tidymodels · topepo · Oct 11, 2024 · Aug 5, 2024 · Aug 29, 2024 · Aug 29, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: parsnip
 Title: A Common API to Modeling and Analysis Functions
-Version: 1.2.1.9002
+Version: 1.2.1.9003
 Authors@R: c(
     person("Max", "Kuhn", , "[email protected]", role = c("aut", "cre")),
     person("Davis", "Vaughan", , "[email protected]", role = "aut"),
@@ -25,7 +25,7 @@ Imports:
     ggplot2,
     globals,
     glue,
-    hardhat (>= 1.4.0),
+    hardhat (>= 1.4.0.9002),
     lifecycle,
     magrittr,
     pillar,
@@ -40,8 +40,8 @@ Imports:
     vctrs (>= 0.6.0),
     withr
 Suggests: 
-    C50,
     bench,
+    C50,
     covr,
     dials (>= 1.1.0),
     earth,
@@ -69,6 +69,9 @@ Suggests:
     xgboost (>= 1.5.0.1)
 VignetteBuilder: 
     knitr
+Remotes: 
+    r-lib/sparsevctrs,
+    tidymodels/hardhat
 ByteCompile: true
 Config/Needs/website: C50, dbarts, earth, glmnet, keras, kernlab, kknn,
     LiblineaR, mgcv, nnet, parsnip, randomForest, ranger, rpart, rstanarm,
@@ -79,6 +82,4 @@ Config/testthat/edition: 3
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-Remotes:  
-    r-lib/sparsevctrs
 RoxygenNote: 7.3.2
diff --git a/NAMESPACE b/NAMESPACE
@@ -264,6 +264,7 @@ export(make_classes)
 export(make_engine_list)
 export(make_seealso_list)
 export(mars)
+export(matrix_to_quantile_pred)
 export(max_mtry_formula)
 export(maybe_data_frame)
 export(maybe_matrix)
@@ -402,6 +403,7 @@ importFrom(stats,as.formula)
 importFrom(stats,binomial)
 importFrom(stats,coef)
 importFrom(stats,delete.response)
+importFrom(stats,median)
 importFrom(stats,model.frame)
 importFrom(stats,model.matrix)
 importFrom(stats,model.offset)

diff --git a/NEWS.md b/NEWS.md
@@ -1,25 +1,41 @@
 # parsnip (development version)
 
+## New Features
+
+* A new model mode (`"quantile regression"`) was added. Including:
+  * A `linear_reg()` engine for `"quantreg"`. 
+  * Predictions are encoded via a custom vector type. See [hardhat::quantile_pred()].
+  * Predicted quantile levels are designated when the new mode is specified. See `?set_mode`.
+
 * `fit_xy()` can now take dgCMatrix input for `x` argument (#1121).
 
 * `fit_xy()` can now take sparse tibbles as data values (#1165).
 
 * `predict()` can now take dgCMatrix and sparse tibble input for `new_data` argument, and error informatively when model doesn't support it (#1167).
 
-* Transitioned package errors and warnings to use cli (#1147 and #1148 by
-  @shum461, #1153 by @RobLBaker and @wright13, #1154 by @JamesHWade, #1160, 
-  #1161, #1081).
+* New `extract_fit_time()` method has been added that returns the time it took to train the model (#853).
+
+## Other Changes
+
+* Transitioned package errors and warnings to use cli (#1147 and #1148 by @shum461, #1153 by @RobLBaker and @wright13, #1154 by @JamesHWade, #1160, #1161, #1081).
 
 * `fit_xy()` currently raises an error for `gen_additive_mod()` model specifications as the default engine (`"mgcv"`) specifies smoothing terms in model formulas. However, some engines specify smooths via additional arguments, in which case the restriction on `fit_xy()` is excessive. parsnip will now only raise an error when fitting a `gen_additive_mod()` with `fit_xy()` when using the `"mgcv"` engine (#775).
 
 * Aligned `null_model()` with other model types; the model type now has an engine argument that defaults to `"parsnip"` and is checked with the same machinery that checks other model types in the package (#1083).
 
-* New `extract_fit_time()` method has been added that returns the time it took to train the model (#853).
+## Bug Fixes
 
 * Ensure that `knit_engine_docs()` has the required packages installed (#1156).
 
 * Fixed bug where some models fit using `fit_xy()` couldn't predict (#1166).
 
+## Breaking Change
+
+* For quantile prediction, the `predict()` argument has been changed from `quantile` to `quantile_levels` for consistency. This does not affect models with mode `"quantile regression"`. 
+
+* The quantile regression prediction type was disabled for the deprecated `surv_reg()` model. 
+
+
 # parsnip 1.2.1
 
 * Added a missing `tidy()` method for survival analysis glmnet models (#1086).

diff --git a/R/aaa-import-standalone-types-check.R b/R/aaa-import-standalone-types-check.R
@@ -1,7 +1,3 @@
-# Standalone file: do not edit by hand
-# Source: <https://github.com/r-lib/rlang/blob/main/R/standalone-types-check.R>
-# ----------------------------------------------------------------------
-#
 # ---
 # repo: r-lib/rlang
 # file: standalone-types-check.R
@@ -13,6 +9,9 @@
 #
 # ## Changelog
 #
+# 2024-08-15:
+# - `check_character()` gains an `allow_na` argument (@martaalcalde, #1724)
+#
 # 2023-03-13:
 # - Improved error messages of number checkers (@teunbrand)
 # - Added `allow_infinite` argument to `check_number_whole()` (@mgirlich).
@@ -461,15 +460,28 @@ check_formula <- function(x,
 
 # Vectors -----------------------------------------------------------------
 
+# TODO: Figure out what to do with logical `NA` and `allow_na = TRUE`
+
 check_character <- function(x,
                             ...,
+                            allow_na = TRUE,
                             allow_null = FALSE,
                             arg = caller_arg(x),
                             call = caller_env()) {
+
   if (!missing(x)) {
     if (is_character(x)) {
+      if (!allow_na && any(is.na(x))) {
+        abort(
+          sprintf("`%s` can't contain NA values.", arg),
+          arg = arg,
+          call = call
+        )
+      }
+
       return(invisible(NULL))
     }
+
     if (allow_null && is_null(x)) {
       return(invisible(NULL))
     }
@@ -479,7 +491,6 @@ check_character <- function(x,
     x,
     "a character vector",
     ...,
-    allow_na = FALSE,
     allow_null = allow_null,
     arg = arg,
     call = call

diff --git a/R/aaa_models.R b/R/aaa_models.R
@@ -1,6 +1,6 @@
 # Initialize model environments
 
-all_modes <- c("classification", "regression", "censored regression")
+all_modes <- c("classification", "regression", "censored regression", "quantile regression")
 
 # ------------------------------------------------------------------------------
 
@@ -194,9 +194,9 @@ stop_missing_engine <- function(cls, call) {
   )
 }
 
-check_mode_for_new_engine <- function(cls, eng, mode, call = caller_env()) {
-  all_modes <- get_from_env(paste0(cls, "_modes"))
-  if (!(mode %in% all_modes)) {
+check_mode_for_new_engine <- function(cls, eng, mode) {
+  model_modes <- get_from_env(paste0(cls, "_modes"))
+  if (!(mode %in% model_modes)) {
     cli::cli_abort(
       "{.val {mode}} is not a known mode for model {.fn {cls}}.",
       call = call

diff --git a/R/aaa_quantiles.R b/R/aaa_quantiles.R
@@ -0,0 +1,17 @@
+#' Reformat quantile predictions
+#'
+#' @param x A matrix of predictions with rows as samples and columns as quantile
+#' levels.
+#' @param object A parsnip `model_fit` object from a quantile regression model.
+#' @keywords internal
+#' @export
+matrix_to_quantile_pred <- function(x, object) {
+  if (!is.matrix(x)) {
+    x <- as.matrix(x)
+  }
+  rownames(x) <- NULL
+  n_pred_quantiles <- ncol(x)
+  quantile_levels <- object$spec$quantile_levels
+
+  tibble::new_tibble(x = list(.pred_quantile = hardhat::quantile_pred(x, quantile_levels)))
+}
diff --git a/R/arguments.R b/R/arguments.R
@@ -49,6 +49,8 @@ check_eng_args <- function(args, obj, core_args) {
 #'   set_args(mtry = 3, importance = TRUE) %>%
 #'   set_mode("regression")
 #'
+#' linear_reg() %>%
+#'   set_mode("quantile regression", quantile_levels = c(0.2, 0.5, 0.8))
 #' @export
 set_args <- function(object, ...) {
   UseMethod("set_args")
@@ -89,12 +91,17 @@ set_args.default <- function(object,...) {
 
 #' @rdname set_args
 #' @export
-set_mode <- function(object, mode) {
+set_mode <- function(object, mode, ...) {
   UseMethod("set_mode")
 }
 
+#' @rdname set_args
+#' @param quantile_levels A vector of values between zero and one (only for the
+#' `"quantile regression"` mode); otherwise, it is `NULL`. The model uses these
+#' values to appropriately train quantile regression models to make predictions
+#' for these values (e.g., `quantile_levels = 0.5` is the median).
 #' @export
-set_mode.model_spec <- function(object, mode) {
+set_mode.model_spec <- function(object, mode, quantile_levels = NULL, ...) {
   cls <- class(object)[1]
   if (rlang::is_missing(mode)) {
     spec_modes <- rlang::env_get(get_model_env(), paste0(cls, "_modes"))
@@ -111,11 +118,21 @@ set_mode.model_spec <- function(object, mode) {
 
   object$mode <- mode
   object$user_specified_mode <- TRUE
+  if (mode == "quantile regression") {
+      hardhat::check_quantile_levels(quantile_levels)
+  } else {
+    if (!is.null(quantile_levels)) {
+      cli::cli_abort("{.arg quantile_levels} is only used when the mode is
+                      {.val quantile regression}.")
+    }
+  }
+
+  object$quantile_levels <- quantile_levels
   object
 }
 
 #' @export
-set_mode.default <- function(object, mode) {
+set_mode.default <- function(object, mode, ...) {
   error_set_object(object, func = "set_mode")
 
   invisible(FALSE)

diff --git a/R/fit.R b/R/fit.R
@@ -176,6 +176,10 @@ fit.model_spec <-
     eval_env$formula <- formula
     eval_env$weights <- wts
 
+    if (!is.null(object$quantile_levels)) {
+      eval_env$quantile_levels <- object$quantile_levels
+    }
+
     data <- materialize_sparse_tibble(data, object, "data")
 
     fit_interface <-
@@ -187,7 +191,6 @@ fit.model_spec <-
            with a spark data object."
         )
 
-
     # populate `method` with the details for this model type
     object <- add_methods(object, engine = object$engine)
 
@@ -295,6 +298,10 @@ fit_xy.model_spec <-
     eval_env$y_var <- y_var
     eval_env$weights <- weights_to_numeric(case_weights, object)
 
+    if (!is.null(object$quantile_levels)) {
+      eval_env$quantile_levels <- object$quantile_levels
+    }
+
     # TODO case weights: pass in eval_env not individual elements
     fit_interface <- check_xy_interface(eval_env$x, eval_env$y, cl, object)
 

diff --git a/R/linear_reg_data.R b/R/linear_reg_data.R
@@ -1,6 +1,7 @@
 set_new_model("linear_reg")
 
 set_model_mode("linear_reg", "regression")
+set_model_mode("linear_reg", "quantile regression")
 
 # ------------------------------------------------------------------------------
 
@@ -582,3 +583,67 @@ set_pred(
   )
 )
 
+# ------------------------------------------------------------------------------
+
+set_model_engine("linear_reg", "quantile regression", "quantreg")
+set_dependency("linear_reg", "quantreg", "quantreg")
+
+set_fit(
+  model = "linear_reg",
+  eng = "quantreg",
+  mode = "quantile regression",
+  value = list(
+    interface = "formula",
+    protect = c("formula", "data", "weights"),
+    func = c(pkg = "quantreg", fun = "rq"),
+    defaults = list(tau = expr(quantile_levels))
+  )
+)
+
+set_encoding(
+  model = "linear_reg",
+  eng = "quantreg",
+  mode = "quantile regression",
+  options = list(
+    predictor_indicators = "traditional",
+    compute_intercept = TRUE,
+    remove_intercept = TRUE,
+    allow_sparse_x = FALSE
+  )
+)
+
+set_pred(
+  model = "linear_reg",
+  eng = "quantreg",
+  mode = "quantile regression",
+  type = "numeric",
+  value = list(
+    pre = NULL,
+    post = NULL,
+    func = c(fun = "predict"),
+    args =
+      list(
+        object = expr(object$fit),
+        newdata = expr(new_data),
+        type = "response",
+        rankdeficient = "simple"
+      )
+  )
+)
+
+set_pred(
+  model = "linear_reg",
+  eng = "quantreg",
+  mode = "quantile regression",
+  type = "quantile",
+  value = list(
+    pre = NULL,
+    post = matrix_to_quantile_pred,
+    func = c(fun = "predict"),
+    args =
+      list(
+        object = expr(object$fit),
+        newdata = expr(new_data)
+      )
+  )
+)
diff --git a/R/parsnip-package.R b/R/parsnip-package.R
@@ -21,7 +21,7 @@
 #' @importFrom stats .checkMFClasses .getXlevels as.formula binomial coef
 #' @importFrom stats delete.response model.frame model.matrix model.offset
 #' @importFrom stats model.response model.weights na.omit na.pass predict qnorm
-#' @importFrom stats qt quantile setNames terms update
+#' @importFrom stats qt quantile setNames terms update median
 #' @importFrom tibble as_tibble is_tibble tibble
 #' @importFrom tidyr gather
 #' @importFrom utils capture.output getFromNamespace globalVariables head

diff --git a/R/predict.R b/R/predict.R
@@ -201,12 +201,14 @@ check_pred_type <- function(object, type, ..., call = rlang::caller_env()) {
         regression = "numeric",
         classification = "class",
         "censored regression" = "time",
+        "quantile regression" = "quantile",
         cli::cli_abort(
-          "{.arg type} should be 'regression', 'censored regression', or 'classification'.",
+          "{.arg type} should be one of {.val {all_modes}}.",
           call = call
         )
       )
   }
+
   if (!(type %in% pred_types))
     cli::cli_abort(
       "{.arg type} should be one of {.or {.arg {pred_types}}}.",
@@ -373,7 +375,7 @@ check_pred_type_dots <- function(object, type, ..., call = rlang::caller_env())
 
   # ----------------------------------------------------------------------------
 
-  other_args <- c("interval", "level", "std_error", "quantile",
+  other_args <- c("interval", "level", "std_error", "quantile_levels",
                   "time", "eval_time", "increasing")
 
   eval_time_types <- c("survival", "hazard")