mlr-org · be-marc · Nov 20, 2024 · Nov 20, 2024 · Nov 21, 2024 · Nov 21, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -79,6 +79,8 @@ RoxygenNote: 7.3.2
 Collate:
     'mlr_reflections.R'
     'BenchmarkResult.R'
+    'CallbackResample.R'
+    'ContextResample.R'
     'warn_deprecated.R'
     'DataBackend.R'
     'DataBackendCbind.R'
@@ -189,6 +191,7 @@ Collate:
     'helper_print.R'
     'install_pkgs.R'
     'marshal.R'
+    'mlr_callbacks.R'
     'mlr_sugar.R'
     'mlr_test_helpers.R'
     'partition.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -120,6 +120,8 @@ S3method(unmarshal_model,classif.debug_model_marshaled)
 S3method(unmarshal_model,default)
 S3method(unmarshal_model,learner_state_marshaled)
 export(BenchmarkResult)
+export(CallbackResample)
+export(ContextResample)
 export(DataBackend)
 export(DataBackendDataTable)
 export(DataBackendMatrix)
@@ -207,6 +209,8 @@ export(assert_measure)
 export(assert_measures)
 export(assert_predictable)
 export(assert_prediction)
+export(assert_resample_callback)
+export(assert_resample_callbacks)
 export(assert_resample_result)
 export(assert_resampling)
 export(assert_resamplings)
@@ -218,7 +222,10 @@ export(assert_validate)
 export(auto_convert)
 export(benchmark)
 export(benchmark_grid)
+export(callback_resample)
 export(check_prediction_data)
+export(clbk)
+export(clbks)
 export(col_info)
 export(convert_task)
 export(create_empty_prediction_data)
@@ -236,6 +243,7 @@ export(learner_unmarshal)
 export(lrn)
 export(lrns)
 export(marshal_model)
+export(mlr_callbacks)
 export(mlr_learners)
 export(mlr_measures)
 export(mlr_reflections)
@@ -269,6 +277,9 @@ importFrom(data.table,data.table)
 importFrom(future,nbrOfWorkers)
 importFrom(future,plan)
 importFrom(graphics,plot)
+importFrom(mlr3misc,clbk)
+importFrom(mlr3misc,clbks)
+importFrom(mlr3misc,mlr_callbacks)
 importFrom(parallelly,availableCores)
 importFrom(stats,contr.treatment)
 importFrom(stats,model.frame)

diff --git a/R/BenchmarkResult.R b/R/BenchmarkResult.R
@@ -548,7 +548,9 @@ BenchmarkResult = R6Class("BenchmarkResult",
 as.data.table.BenchmarkResult = function(x, ..., hashes = FALSE, predict_sets = "test", task_characteristics = FALSE) { # nolint
   assert_flag(task_characteristics)
   tab = get_private(x)$.data$as_data_table(view = NULL, predict_sets = predict_sets)
-  tab = tab[, c("uhash", "task", "learner", "resampling", "iteration", "prediction"), with = FALSE]
+  cns = c("uhash", "task", "learner", "resampling", "iteration", "prediction")
+  if ("data_extra" %in% names(tab)) cns = c(cns, "data_extra")
+  tab = tab[, cns, with = FALSE]
 
   if (task_characteristics) {
     set(tab, j = "characteristics", value = map(tab$task, "characteristics"))

diff --git a/R/CallbackResample.R b/R/CallbackResample.R
@@ -0,0 +1,155 @@
+#' @title Resample Callback
+#'
+#' @description
+#' Specialized [mlr3misc::Callback] to customize the behavior of [resample()] and [benchmark()] in mlr3.
+#' The [callback_resample()] function is used to create instances of this class.
+#' Predefined callbacks are stored in the [dictionary][mlr3misc::Dictionary] [mlr_callbacks] and can be retrieved with [clbk()].
+#' For more information on callbacks, see the [callback_resample()] documentation.
+#'
+#' @export
+CallbackResample = R6Class("CallbackResample",
+  inherit = Callback,
+  public = list(
+
+    #' @field on_resample_begin (`function()`)\cr
+    #' Stage called at the beginning of the resampling iteration.
+    #' Called in `workhorse()` (internal).
+    on_resample_begin = NULL,
+
+    #' @field on_resample_before_train (`function()`)\cr
+    #' Stage called before training the learner.
+    #' Called in `workhorse()` (internal).
+    on_resample_before_train = NULL,
+
+    #' @field on_resample_before_predict (`function()`)\cr
+    #' Stage called before predicting.
+    #' Called in `workhorse()` (internal).
+    on_resample_before_predict = NULL,
+
+    #' @field on_resample_end (`function()`)\cr
+    #' Stage called at the end of the resample iteration.
+    #' Called in `workhorse()` (internal).
+    on_resample_end = NULL
+  )
+)
+
+#' @title Create Evaluation Callback
+#'
+#' @description
+#' Function to create a [CallbackResample].
+#' Predefined callbacks are stored in the [dictionary][mlr3misc::Dictionary] [mlr_callbacks] and can be retrieved with [clbk()].
+#'
+#' Evaluation callbacks are called at different stages of the resampling process.
+#' Each stage is called once per resampling iteration.
+#' The stages are prefixed with `on_resample_*`.
+#' The text in brackets indicates what happens between the stages and which accesses to the [ContextResample] (`ctx`) are typical for the stage.
+#'
+#' ```
+#' Start Resampling Iteration on Worker
+#'  - on_resample_begin
+#'    (Split `ctx$task` into training and test set with `ctx$resampling` and `ctx$iteration`)
+#'  - on_resample_before_train
+#'    (Train the learner `ctx$learner` on training data)
+#'  - on_resample_before_predict
+#'    (Predict on predict sets and store prediction data `ctx$pdatas`)
+#'  - on_resample_end
+#'    (Erase model `ctx$learner$model` if requested and return results)
+#' End Resampling Iteration on Worker
+#' ```
+#'
+#' The callback can store data in `ctx$learner$state` or `ctx$data_extra`.
+#' The data in `ctx$data_extra` is stored in the [ResampleResult] or [BenchmarkResult].
+#' See also the section on parameters for more information on the stages.
+#
+#' @details
+#' When implementing a callback, each function must have two arguments named `callback` and `context`.
+#' A callback can write data to the state (`$state`), e.g. settings that affect the callback itself.
+#' We highly discourage changing the task, learner and resampling objects via the callback.
+#'
+#' @param id (`character(1)`)\cr
+#'   Identifier for the new instance.
+#' @param label (`character(1)`)\cr
+#'   Label for the new instance.
+#' @param man (`character(1)`)\cr
+#'   String in the format `[pkg]::[topic]` pointing to a manual page for this object.
+#'   The referenced help package can be opened via method `$help()`.
+#' @param on_resample_begin (`function()`)\cr
+#'   Stage called at the beginning of an evaluation.
+#'   Called in `workhorse()` (internal).
+#' @param on_resample_before_train (`function()`)\cr
+#'   Stage called before training the learner.
+#'   Called in `workhorse()` (internal).
+#' @param on_resample_before_predict (`function()`)\cr
+#'   Stage called before predicting.
+#'   Called in `workhorse()` (internal).
+#' @param on_resample_end (`function()`)\cr
+#'   Stage called at the end of an evaluation.
+#'   Called in `workhorse()` (internal).
+#'
+#' @export
+#' @examples
+#' task = tsk("pima")
+#' learner = lrn("classif.rpart")
+#' resampling = rsmp("cv", folds = 3)
+#'
+#' # save selected features callback
+#' callback = callback_resample("selected_features",
+#'  on_resample_end = function(callback, context) {
+#'     context$learner$state$selected_features = context$learner$selected_features()
+#'   }
+#' )
+#'
+#' rr = resample(task, learner, resampling, callbacks = callback)
+#'
+#' rr$learners[[1]]$state$selected_features
+callback_resample = function(
+  id,
+  label = NA_character_,
+  man = NA_character_,
+  on_resample_begin = NULL,
+  on_resample_before_train = NULL,
+  on_resample_before_predict = NULL,
+  on_resample_end = NULL
+  ) {
+  stages = discard(set_names(list(
+    on_resample_begin,
+    on_resample_before_train,
+    on_resample_before_predict,
+    on_resample_end),
+    c(
+      "on_resample_begin",
+      "on_resample_before_train",
+      "on_resample_before_predict",
+      "on_resample_end"
+    )), is.null)
+
+  stages = map(stages, function(stage) crate(assert_function(stage, args = c("callback", "context"))))
+  callback = CallbackResample$new(id, label, man)
+  iwalk(stages, function(stage, name) callback[[name]] = stage)
+  callback
+}
+
+#' @title Assertions for Callbacks
+#'
+#' @description
+#' Assertions for [CallbackResample] class.
+#'
+#' @param callback ([CallbackResample]).
+#' @param null_ok (`logical(1)`)\cr
+#'   If `TRUE`, `NULL` is allowed.
+#'
+#' @return [CallbackResample | List of [CallbackResample]s.
+#' @export
+assert_resample_callback = function(callback, null_ok = FALSE) {
+  assert_class(callback, "CallbackResample", null.ok = null_ok)
+  invisible(callback)
+}
+
+#' @export
+#' @param callbacks (list of [CallbackResample]).
+#' @rdname assert_resample_callback
+assert_resample_callbacks = function(callbacks, null_ok = FALSE) {
+  assert_list(callbacks, null.ok = null_ok)
+  if (null_ok && is.null(callbacks)) return(invisible(NULL))
+  invisible(lapply(callbacks, assert_resample_callback))
+}
diff --git a/R/ContextResample.R b/R/ContextResample.R
@@ -0,0 +1,103 @@
+#' @title Resample Context
+#'
+#' @description
+#' A [CallbackResample] accesses and modifies data during [resample()] and [benchmark()] via the `ContextResample`.
+#' See the section on fields for a list of modifiable objects.
+#' See [callback_resample()] for a list of stages that access `ContextResample`.
+#'
+#' @export
+ContextResample = R6Class("ContextResample",
+  inherit = Context,
+  public = list(
+
+    #' @description
+    #' Creates a new instance of this [R6][R6::R6Class] class.
+    #'
+    #' @param task ([Task])\cr
+    #'   The task to be evaluated.
+    #' @param learner ([Learner])\cr
+    #'   The learner to be evaluated.
+    #' @param resampling ([Resampling])\cr
+    #'   The resampling strategy to be used.
+    #' @param iteration (`integer()`)\cr
+    #'   The current iteration.
+    initialize = function(task, learner, resampling, iteration) {
+      # no assertions to avoid overhead
+      private$.task = task
+      private$.learner = learner
+      private$.resampling = resampling
+      private$.iteration = iteration
+
+      super$initialize(id = "evaluate", label = "Evaluation")
+    }
+  ),
+
+  active = list(
+
+    #' @field task ([Task])\cr
+    #' The task to be evaluated.
+    #' The task is unchanged during the evaluation.
+    #' The task is read-only.
+    task = function(rhs) {
+      assert_ro_binding(rhs)
+      private$.task
+    },
+
+    #' @field learner ([Learner])\cr
+    #' The learner to be evaluated.
+    #' The learner contains the models after stage `on_resample_before_train`.
+    learner = function(rhs) {
+      if (missing(rhs)) {
+        return(private$.learner)
+      }
+      private$.learner = assert_learner(rhs)
+    },
+
+    #' @field resampling [Resampling]\cr
+    #' The resampling strategy to be used.
+    #' The resampling is unchanged during the evaluation.
+    #' The resampling is read-only.
+    resampling = function(rhs) {
+      assert_ro_binding(rhs)
+      private$.resampling
+    },
+
+    #' @field iteration (`integer()`)\cr
+    #' The current iteration.
+    #' The iteration is read-only.
+    iteration = function(rhs) {
+      assert_ro_binding(rhs)
+      private$.iteration
+    },
+
+    #' @field pdatas (List of [PredictionData])\cr
+    #' The prediction data.
+    #' The data is available on stage `on_resample_end`.
+    pdatas = function(rhs) {
+      if (missing(rhs)) {
+        return(private$.pdatas)
+      }
+      private$.pdatas = assert_list(rhs, "PredictionData")
+    },
+
+    #' @field data_extra (list())\cr
+    #' Data saved in the [ResampleResult] or [BenchmarkResult].
+    #' Use this field to save results.
+    #' Must be a `list()`.
+    data_extra = function(rhs) {
+      if (missing(rhs)) {
+        return(private$.data_extra)
+      }
+      private$.data_extra = assert_list(rhs)
+    }
+  ),
+
+  private = list(
+    .task = NULL,
+    .learner = NULL,
+    .resampling = NULL,
+    .iteration = NULL,
+    .pdatas = NULL,
+    .data_extra = NULL
+  )
+)
diff --git a/R/ResampleResult.R b/R/ResampleResult.R
@@ -335,6 +335,13 @@ ResampleResult = R6Class("ResampleResult",
       private$.data$learners(private$.view)$learner
     },
 
+    #' @field data_extra (list())\cr
+    #' Additional data stored in the [ResampleResult].
+    data_extra = function(rhs) {
+      assert_ro_binding(rhs)
+      private$.data$data_extra(private$.view)
+    },
+
     #' @field warnings ([data.table::data.table()])\cr
     #' A table with all warning messages.
     #' Column names are `"iteration"` and `"msg"`.
@@ -373,7 +380,9 @@ ResampleResult = R6Class("ResampleResult",
 as.data.table.ResampleResult = function(x, ..., predict_sets = "test") { # nolint
   private = get_private(x)
   tab = private$.data$as_data_table(view = private$.view, predict_sets = predict_sets)
-  tab[, c("task", "learner", "resampling", "iteration", "prediction"), with = FALSE]
+  cns = c("task", "learner", "resampling", "iteration", "prediction")
+  if ("data_extra" %in% names(tab)) cns = c(cns, "data_extra")
+  tab[, cns, with = FALSE]
 }
 
 # #' @export