tidymodels · topepo · Jul 18, 2024 · Jul 18, 2024 · Jul 18, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -77,6 +77,6 @@ Config/testthat/edition: 3
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-Remotes:  
+Remotes: 
     tidymodels/hardhat
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
diff --git a/NEWS.md b/NEWS.md
@@ -7,6 +7,7 @@
 
 * New `extract_fit_time()` method has been added that returns the time it took to train the model (#853).
 
+* Adding `"quantile"` prediction methods for engines using `lm`, `stan`, and `dbarts`. 
 
 # parsnip 1.2.1
 

diff --git a/R/augment.R b/R/augment.R
@@ -78,29 +78,38 @@
 #' augment(cls_xy, cls_tst)
 #' augment(cls_xy, cls_tst[, -3])
 #'
-augment.model_fit <- function(x, new_data, eval_time = NULL, ...) {
+augment.model_fit <- function(x, new_data, eval_time = NULL, quantile = NULL, ...) {
   new_data <- tibble::new_tibble(new_data)
   res <-
     switch(
       x$spec$mode,
-      "regression"          = augment_regression(x, new_data),
+      "regression"          = augment_regression(x, new_data, quantile = quantile),
       "classification"      = augment_classification(x, new_data),
       "censored regression" = augment_censored(x, new_data, eval_time = eval_time),
       rlang::abort(paste("Unknown mode:", x$spec$mode))
     )
   tibble::new_tibble(res)
 }
 
-augment_regression <- function(x, new_data) {
+augment_regression <- function(x, new_data, quantile = NULL) {
   ret <- new_data
   check_spec_pred_type(x, "numeric")
+
+  if (spec_has_pred_type(x, "quantile") & !is.null(quantile)) {
+    ret <-
+      dplyr::bind_cols(
+        predict(x, new_data = new_data, type = "quantile", quantile = quantile),
+        ret)
+  }
+
   ret <- dplyr::bind_cols(predict(x, new_data = new_data), ret)
   if (length(x$preproc$y_var) > 0) {
     y_nm <- x$preproc$y_var
     if (any(names(new_data) == y_nm)) {
       ret <- dplyr::mutate(ret, .resid = !!rlang::sym(y_nm) - .pred)
     }
   }
+
   dplyr::relocate(ret, dplyr::starts_with(".pred"), dplyr::starts_with(".resid"))
 }
 

diff --git a/R/linear_reg_data.R b/R/linear_reg_data.R
@@ -73,6 +73,7 @@ set_pred(
       )
   )
 )
+
 set_pred(
   model = "linear_reg",
   eng = "lm",
@@ -97,6 +98,24 @@ set_pred(
   )
 )
 
+set_pred(
+  model = "linear_reg",
+  eng = "lm",
+  mode = "regression",
+  type = "quantile",
+  value = list(
+    pre = NULL,
+    post = NULL,
+    func = c(fun = "lm_quantile"),
+    args =
+      list(
+        object = expr(object$fit),
+        new_data = expr(new_data),
+        quantile = expr(quantile)
+      )
+  )
+)
+
 set_pred(
   model = "linear_reg",
   eng = "lm",
@@ -582,3 +601,67 @@ set_pred(
   )
 )
 
+# ------------------------------------------------------------------------------
+# Helper functions
+
+lm_quantile <- function(object, new_data, quantile = (1:9)/10) {
+  quantile <- sort(unique(quantile))
+
+  .row <- 1:nrow(new_data)
+
+  if ( any(quantile == 0.5) ) {
+    preds <-
+      tibble::tibble(.quantile = 1/2,
+                     .pred_quantile = predict(object, new_data),
+                     .row = .row)
+  } else {
+    preds <- NULL
+  }
+
+  # Convert (1 - level) / 2 to actual quantile since predict.lm() does two-sided
+  # intervals. For example, using level = 0.95 will give you the intervals
+  # based on c(0.25, 0.975). To actually get c(0,05, 0.95), we need to make
+  # an adjustment.
+
+  upper_quantile <- quantile[quantile > .5]
+  lower_quantile <- quantile[quantile < .5]
+
+  if ( length(upper_quantile) > 0 ) {
+    rev_quant = 1 - upper_quantile
+    upper_adjusted <- 1 + -2 * rev_quant
+  }
+  if ( length(lower_quantile) > 0 ) {
+    lower_adjusted <- 2 * lower_quantile
+  }
+  not_center <- c(lower_quantile, upper_quantile)
+  adjusted <- c(lower_adjusted, upper_adjusted)
+
+  for ( i in seq_along(not_center) ) {
+    tmp_pred <- predict(object, new_data, interval = "prediction", level = adjusted[i])
+    if ( not_center[i] > 0.5 ) {
+      tmp_pred <- tmp_pred[, "upr"]
+    } else {
+      tmp_pred <- tmp_pred[, "lwr"]
+    }
+    tmp_pred <-
+      tibble::tibble(.quantile = not_center[i],
+                     .pred_quantile = tmp_pred,
+                     .row = .row)
+    preds <- dplyr::bind_rows(preds, tmp_pred)
+  }
+
+  # Now convert to list columns
+  quant_to_list(preds)
+}
+
+quant_to_list <- function(x) {
+  x <- x[order(x$.row, x$.quantile), ]
+  x <-
+    vctrs::vec_split(
+      x = x[setdiff(colnames(x), ".row")],
+      by = x$.row
+    )
+  tibble::new_tibble(list(.pred_quantile = x$val))
+}
+
+
diff --git a/R/predict.R b/R/predict.R
@@ -86,9 +86,9 @@
 #' produces for class probabilities (or other non-scalar outputs),
 #' the columns are named `.pred_lower_classlevel` and so on.
 #'
-#' For `type = "quantile"`, the tibble has a `.pred` column, which is
+#' For `type = "quantile"`, the tibble has a `.pred_quantile` column, which is
 #'  a list-column. Each list element contains a tibble with columns
-#'  `.pred` and `.quantile` (and perhaps other columns).
+#'  `.pred_quantile` and `.quantile`.
 #'
 #' For `type = "time"`, the tibble has a `.pred_time` column.
 #'

diff --git a/R/predict_quantile.R b/R/predict_quantile.R
@@ -27,7 +27,7 @@ predict_quantile.model_fit <- function(object,
       new_data <- object$spec$method$pred$quantile$pre(new_data, object)
 
     # Pass some extra arguments to be used in post-processor
-    object$spec$method$pred$quantile$args$p <- quantile
+    object$spec$method$pred$quantile$args$quantile <- quantile
     pred_call <- make_pred_call(object$spec$method$pred$quantile)
 
     res <- eval_tidy(pred_call)

diff --git a/man/augment.Rd b/man/augment.Rd
diff --git a/man/predict.model_fit.Rd b/man/predict.model_fit.Rd
Original file line number	Diff line number	Diff line change
Expand Up		@@ -7,6 +7,7 @@

		* New `extract_fit_time()` method has been added that returns the time it took to train the model (#853).

		* Adding `"quantile"` prediction methods for engines using `lm`, `stan`, and `dbarts`.

		# parsnip 1.2.1

Expand Down