From b8c1a67132edeb626a07f714cba5b67235ca1d3a Mon Sep 17 00:00:00 2001
From: Michael Mayer <mayermichael79@gmail.com>
Date: Fri, 5 Jul 2024 18:47:01 +0200
Subject: [PATCH 1/6] Remove possibility to use factor-valued predictions

---
 DESCRIPTION                       |   2 +-
 NEWS.md                           |   7 +++
 R/hstats.R                        |   2 +-
 R/ice.R                           |   1 -
 R/pd_raw.R                        |   5 +-
 R/utils_input.R                   |  20 +++---
 packaging.R                       |   2 +-
 tests/testthat/test_calculate.R   | 100 ------------------------------
 tests/testthat/test_hstats.R      |   6 --
 tests/testthat/test_ice.R         |   9 ---
 tests/testthat/test_input.R       |   5 --
 tests/testthat/test_partial_dep.R |   8 ---
 12 files changed, 23 insertions(+), 144 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index f161602c..b20a6113 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: hstats
 Title: Interaction Statistics
-Version: 1.1.2
+Version: 1.2.0
 Authors@R: 
     person("Michael", "Mayer", , "mayermichael79@gmail.com", role = c("aut", "cre"))
 Description: Fast, model-agnostic implementation of different H-statistics
diff --git a/NEWS.md b/NEWS.md
index 4cf55340..9009049c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,10 @@
+# hstats 1.2.0
+
+# Major changes
+
+- Factor-valued predictions are no longer possible.
+- Consequently, also removed "classification_error" loss.
+
 # hstats 1.1.2
 
 ## ICE plots
diff --git a/R/hstats.R b/R/hstats.R
index 3a4b5fcd..5cbb7562 100644
--- a/R/hstats.R
+++ b/R/hstats.R
@@ -188,7 +188,7 @@ hstats.default <- function(object, X, v = NULL,
   }
   
   # Predictions ("F" in Friedman and Popescu) always calculated (cheap)
-  f <- wcenter(prepare_pred(pred_fun(object, X, ...), ohe = TRUE), w = w)
+  f <- wcenter(prepare_pred(pred_fun(object, X, ...)), w = w)
   mean_f2 <- wcolMeans(f^2, w = w)  # A vector
   
   # Initialize first progress bar
diff --git a/R/ice.R b/R/ice.R
index 12df435a..8b36dbac 100644
--- a/R/ice.R
+++ b/R/ice.R
@@ -117,7 +117,6 @@ ice.default <- function(object, v, X, pred_fun = stats::predict,
     grid = grid, 
     pred_fun = pred_fun, 
     pred_only = FALSE,
-    ohe = TRUE,
     ...
   )
   pred <- ice_out[["pred"]]
diff --git a/R/pd_raw.R b/R/pd_raw.R
index 3bff4d5c..2549e878 100644
--- a/R/pd_raw.R
+++ b/R/pd_raw.R
@@ -54,11 +54,10 @@ pd_raw <- function(object, v, X, grid, pred_fun = stats::predict,
 #'   predictions. Otherwise, a list with two elements: `pred` (predictions)
 #'   and `grid_pred` (the corresponding grid values in the same mode as the input, 
 #'   but replicated over `X`).
-#' @param ohe Should factor output be one-hot encoded? Default is `FALSE`.
 #' @returns 
 #'   Either a vector/matrix of predictions or a list with predictions and grid.
 ice_raw <- function(object, v, X, grid, pred_fun = stats::predict, 
-                    pred_only = TRUE, ohe = FALSE, ...) {
+                    pred_only = TRUE, ...) {
   D1 <- length(v) == 1L
   n <- nrow(X)
   n_grid <- NROW(grid)
@@ -79,7 +78,7 @@ ice_raw <- function(object, v, X, grid, pred_fun = stats::predict,
   }
   
   # Calculate matrix/vector of predictions
-  pred <- prepare_pred(pred_fun(object, X_pred, ...), ohe = ohe)
+  pred <- prepare_pred(pred_fun(object, X_pred, ...))
   
   if (pred_only) {
     return(pred)
diff --git a/R/utils_input.R b/R/utils_input.R
index 8f2251bf..1f5505aa 100644
--- a/R/utils_input.R
+++ b/R/utils_input.R
@@ -1,21 +1,23 @@
 #' Prepares Predictions
 #' 
-#' Converts predictions to vector, matrix or factor.
+#' Converts predictions to vector or matrix.
 #' 
 #' @noRd
 #' @keywords internal
 #' 
 #' @param x Object representing model predictions.
-#' @param ohe If `x` is a factor: should it be one-hot encoded? Default is `FALSE`.
-#' @returns Like `x`, but converted to matrix, vector, or factor.
-prepare_pred <- function(x, ohe = FALSE) {
+#' @returns Like `x`, but converted to matrix or vector.
+prepare_pred <- function(x) {
   if (is.data.frame(x) && ncol(x) == 1L) {
     x <- x[[1L]]
   }
-  if (ohe && is.factor(x)) {
-    return(fdummy(x))
+  if (!is.vector(x) || !is.matrix(x)) {
+    x <- as.matrix(x)
   }
-  if (is.vector(x) || is.matrix(x) || is.factor(x)) x else as.matrix(x)
+  if (!is.numeric(x)) {
+    stop("Predictions must be numeric!")
+  }
+  return(x)
 }
 
 #' Prepares Group BY Variable
@@ -99,7 +101,7 @@ prepare_w <- function(w, X) {
 #' 
 #' @returns A list with "y" (vector, matrix, or factor) and "y_names" (if `y`
 #'   was passed as column names).
-prepare_y <- function(y, X, ohe = FALSE) {
+prepare_y <- function(y, X) {
   if (NROW(y) < nrow(X) && all(y %in% colnames(X))) {
     y_names <- y
     if (is.data.frame(X) && length(y) == 1L) {
@@ -111,6 +113,6 @@ prepare_y <- function(y, X, ohe = FALSE) {
     stopifnot(NROW(y) == nrow(X))
     y_names <- NULL
   }
-  list(y = prepare_pred(y, ohe = ohe), y_names = y_names)
+  list(y = prepare_pred(y), y_names = y_names)
 }
 
diff --git a/packaging.R b/packaging.R
index 69c040b2..5359f72b 100644
--- a/packaging.R
+++ b/packaging.R
@@ -15,7 +15,7 @@ library(usethis)
 use_description(
   fields = list(
     Title = "Interaction Statistics",
-    Version = "1.1.2",
+    Version = "1.2.0",
     Description = "Fast, model-agnostic implementation of different H-statistics
     introduced by Jerome H. Friedman and Bogdan E. Popescu (2008) <doi:10.1214/07-AOAS148>. 
     These statistics quantify interaction strength per feature, feature pair, 
diff --git a/tests/testthat/test_calculate.R b/tests/testthat/test_calculate.R
index b3278ee1..d78618d2 100644
--- a/tests/testthat/test_calculate.R
+++ b/tests/testthat/test_calculate.R
@@ -4,7 +4,6 @@ test_that("rep_each() works", {
   expect_true(is.integer(rep_each(100, 100)))
 })
 
-# The next two checks copied from {kernelshap}
 test_that("rep_rows() gives the same as usual subsetting (except rownames)", {
   setrn <- function(x) {rownames(x) <- 1:nrow(x); x}
   
@@ -45,38 +44,6 @@ test_that("fdummy() respects factor level order", {
   expect_equal(colnames(d1), rev(colnames(d2)))
 })
 
-test_that("colMeans_factor() works", {
-  x <- c("A", "A", "C", "D")
-  expect_equal(colMeans_factor(x), colMeans(fdummy(x)))
-
-  expect_equal(colMeans_factor(c("A", "B", "B")), c(A = 1/3, B = 2/3))
-  expect_equal(
-    colMeans_factor(factor(c("A", "B", "B"), levels = c("B", "A", "C"))), 
-    c(B = 2/3, A = 1/3, C = 0)
-  )
-  expect_equal(
-    colMeans_factor(iris$Species), c(setosa = 1/3, versicolor = 1/3, virginica = 1/3)
-  )
-  expect_equal(
-    colMeans_factor(iris$Species[101:150]), c(setosa = 0, versicolor = 0, virginica = 1)
-  )
-})
-
-test_that("colMeans_factor() works for singletons", {
-  x <- c("A")
-  expect_equal(colMeans_factor(x), colMeans(fdummy(x)))
-  expect_true(is.vector(colMeans_factor(x)))
-})
-
-test_that("colMeans_factor() respects factor level order", {
-  x1 <- factor(c("A", "A", "C", "D"))
-  x2 <- factor(x1, levels = rev(levels(x1)))
-  d1 <- colMeans_factor(x1)
-  d2 <- colMeans_factor(x2)
-  expect_equal(d1, d2[names(d1)])
-  expect_equal(names(d1), rev(names(d2)))
-})
-
 test_that("wcolMeans() works", {
   x <- cbind(a = 1:6, b = 6:1)
   x_df <- as.data.frame(x)
@@ -100,9 +67,6 @@ test_that("wrowmean_vector() works for vectors and 1D matrices", {
   out1 <- wrowmean_vector(x1, ngroups = 2L)
   out2 <- wrowmean_vector(x2, ngroups = 2L)
   
-  expect_error(wrowmean_vector(factor(1, 2)))
-  expect_error(wrowmean_vector(matrix(1:4, ncol = 2L)))
-  
   expect_true(is.matrix(out1))
   expect_equal(out1, unname(out2))
   expect_equal(out2, cbind(a = c(5, 2)))
@@ -128,32 +92,6 @@ test_that("wrowmean_vector() works for vectors and 1D matrices", {
   expect_equal(out2, wrowmean(x2, ngroups = 2L, w = 1:3))
 })
 
-test_that("rowmean_factor() works for factor input", {
-  x <- factor(c("C", "A", "C", "C", "A", "A"))
-  out <- rowmean_factor(x, ngroups = 2L)
-  
-  expect_true(is.matrix(out))
-  expect_equal(out, cbind(A = c(1/3, 2/3), C = c(2/3, 1/3)))
-  expect_equal(out, wrowmean_matrix(fdummy(x), ngroups = 2L))
-  expect_equal(out, wrowmean(x, ngroups = 2L))
-})
-
-test_that("rowmean() works for factor input with weights", {
-  x <- factor(c("C", "A", "C", "C", "A", "A"))
-  out <- wrowmean(x, ngroups = 2L)
-  
-  # Constant weights have no effect
-  expect_equal(wrowmean(x, ngroups = 2L, w = c(1, 1, 1)), out)
-  expect_equal(wrowmean(x, ngroups = 2L, w = c(4, 4, 4)), out)
-  
-  # Non-constant weights
-  A1 <- weighted.mean(x[1:3] == "A", 1:3)
-  A2 <- weighted.mean(x[4:6] == "A", 1:3)
-  C1 <- weighted.mean(x[1:3] == "C", 1:3)
-  C2 <- weighted.mean(x[4:6] == "C", 1:3)
-  expect_equal(wrowmean(x, ngroups = 2L, w = 1:3), cbind(A = c(A1, A2), C = c(C1, C2)))
-})
-
 test_that("wrowmean_matrix() works for matrices with > 1 columns", {
   x <- cbind(x = 6:1, z = 1:6)
   out <- wrowmean_matrix(x, ngroups = 2L)
@@ -181,10 +119,6 @@ test_that("wrowmean_matrix() works for matrices with > 1 columns", {
   expect_equal(out, wrowmean(x, ngroups = 2L, w = 1:3))
 })
 
-test_that("wrowmean() checks for correct weight lengths", {
-  expect_error(wrowmean(1:10, ngroups = 2L, w = 1:10))
-})
-
 test_that("wrowmean() equals wColMeans() for ngroups = 1", {
   x <- 1:10
   expect_equal(wrowmean(x), rbind(wcolMeans(x)))
@@ -197,10 +131,6 @@ test_that("wrowmean() equals wColMeans() for ngroups = 1", {
   X <- cbind(a = 1:10, b = 10:1)
   expect_equal(wrowmean(X), rbind(wcolMeans(X)))
   expect_equal(wrowmean(X, w = 1:10), rbind(wcolMeans(X, w = 1:10)))
-  
-  x <- factor(c("B", "B", "B", "C"))
-  expect_equal(wrowmean(x), rbind(wcolMeans(x)))
-  expect_equal(wrowmean(x, w = 1:4), rbind(wcolMeans(x, w = 1:4)))
 })
 
 test_that("gwcolMeans() works", {
@@ -233,36 +163,6 @@ test_that("gwcolMeans() works", {
   expect_equal(r$w, c(sum(3:6), sum(1:2)))
 })
 
-test_that("gwcolMeans() works for factors", {
-  x <- factor(c("A", "B", "A", "B", "A", "A"), levels = c("B", "A"))
-  g <- c(2, 2, 1, 1, 1, 1)
-  w1 <- rep(2, times = 6)
-  w2 <- 1:6
-  
-  # Ungrouped
-  r <- gwColMeans(x)
-  expect_equal(r$M, rbind(wcolMeans(x)))
-  expect_equal(r$w, length(x))
-  
-  r <- gwColMeans(x, w = w1)
-  expect_equal(r$M, rbind(wcolMeans(x, w = w1)))
-  expect_equal(r$w, sum(w1))
-  
-  r <- gwColMeans(x, w = w2)
-  expect_equal(r$M, rbind(wcolMeans(x, w = w2)))
-  expect_equal(r$w, sum(w2))
-  
-  # Grouped
-  r <- gwColMeans(x, g = g)
-  expect_equal(r$M[2L, ], wcolMeans(x[g == 2]))
-  expect_equal(r$w, c(4, 2))
-  
-  # Grouped and weighted
-  r <- gwColMeans(x, g = g, w = w2)
-  expect_equal(r$M[2L, ], wcolMeans(x[g == 2], w = w2[g == 2]))
-  expect_equal(r$w, c(sum(3:6), sum(1:2)))
-})
-
 test_that("wcenter() works for matrices with > 1 columns", {
   x <- cbind(a = 1:6, b = 6:1)
   expect_equal(wcenter(x), cbind(a = 1:6 - mean(1:6), b = 6:1 - mean(6:1)))
diff --git a/tests/testthat/test_hstats.R b/tests/testthat/test_hstats.R
index 5df71942..e9d6e7eb 100644
--- a/tests/testthat/test_hstats.R
+++ b/tests/testthat/test_hstats.R
@@ -356,12 +356,6 @@ test_that("hstats() matches {iml} 0.11.1 in a specific case", {
   )
 })
 
-test_that("hstats() works for factor predictions", {
-  pf <- function(m, X) factor(X[, "v1"], levels = 0:1, labels = c("zero", "one"))
-  out <- hstats(1, X = cbind(v1 = 0:1, v2 = 0), pred_fun = pf, verbose = FALSE)
-  expect_equal(out$h2$num, cbind(zero = 0, one = 0))
-})
-
 test_that("hstats() gives all zero statistics if prediction is constant", {
   pf <- function(m, X) numeric(nrow(X))
   out <- hstats(1, X = iris[1:4], pred_fun = pf, verbose = FALSE)
diff --git a/tests/testthat/test_ice.R b/tests/testthat/test_ice.R
index 449d2eaa..8111e2a0 100644
--- a/tests/testthat/test_ice.R
+++ b/tests/testthat/test_ice.R
@@ -236,12 +236,3 @@ test_that("ice() works with missing value in BY", {
   expect_true(anyNA(r$data$x3))
   expect_s3_class(plot(r), "ggplot")
 })
-
-test_that("ice() works for factor predictions", {
-  pf <- function(m, X) factor(X[, "v1"], levels = 0:1, labels = c("zero", "one"))
-  out <- ice(1, v = "v1", X = cbind(v1 = 0:1), pred_fun = pf)
-  out <- out$data[out$data$obs_ == 1L, c("zero", "one")]
-  out <- as.matrix(out)
-  row.names(out) <- NULL
-  expect_equal(out, cbind(zero = 1:0, one = 0:1))
-})
diff --git a/tests/testthat/test_input.R b/tests/testthat/test_input.R
index 5d700a93..274e4b55 100644
--- a/tests/testthat/test_input.R
+++ b/tests/testthat/test_input.R
@@ -3,8 +3,6 @@ test_that("prepare_pred() works", {
   expect_equal(prepare_pred(iris["Species"]), iris$Species)
   expect_equal(prepare_pred(iris$Sepal.Width), iris$Sepal.Width)
   expect_equal(prepare_pred(iris["Sepal.Width"]), iris$Sepal.Width)
-  expect_equal(prepare_pred(iris$Species, ohe = TRUE), fdummy(iris$Species))
-  expect_equal(prepare_pred(1:3, ohe = TRUE), 1:3)
 })
 
 test_that("prepare_w() works", {
@@ -43,7 +41,4 @@ test_that("prepare_y() works", {
   out <- prepare_y("Sepal.Width", X = iris)
   expect_equal(out$y, iris$Sepal.Width)
   expect_equal(out$y_names, "Sepal.Width")
-  
-  # OHE
-  expect_equal(prepare_y(iris$Species, X = iris, ohe = TRUE)$y, fdummy(iris$Species))
 })
diff --git a/tests/testthat/test_partial_dep.R b/tests/testthat/test_partial_dep.R
index a23a3d3c..78147833 100644
--- a/tests/testthat/test_partial_dep.R
+++ b/tests/testthat/test_partial_dep.R
@@ -532,11 +532,3 @@ test_that(".compress_grid() leaves grid unchanged if unique", {
   expect_equal(out$grid, g)
   expect_equal(out$reindex, NULL)
 })
-
-test_that("partial_dep() works for factor predictions", {
-  pf <- function(m, X) factor(X[, "v1"], levels = 0:1, labels = c("zero", "one"))
-  out <- partial_dep(1, v = "v1", X = cbind(v1 = 0:1), pred_fun = pf)
-  out <- as.matrix(out$data[c("zero", "one")])
-  row.names(out) <- NULL
-  expect_equal(out, cbind(zero = 1:0, one = 0:1))
-})

From fbcfb8af220833d7f7754e016b2a61f381c5975c Mon Sep 17 00:00:00 2001
From: Michael Mayer <mayermichael79@gmail.com>
Date: Fri, 5 Jul 2024 18:55:15 +0200
Subject: [PATCH 2/6] Simplified computes

---
 R/utils_calculate.R | 108 ++++----------------------------------------
 R/utils_input.R     |   1 -
 2 files changed, 9 insertions(+), 100 deletions(-)

diff --git a/R/utils_calculate.R b/R/utils_calculate.R
index 49c50f16..a1f5620c 100644
--- a/R/utils_calculate.R
+++ b/R/utils_calculate.R
@@ -16,14 +16,6 @@ rep_each <- function(m, each) {
   dim(out) <- NULL
   out 
 }
-# 
-# # Same as rep.int(seq_len(m), times)
-# rep_times <- function(m, times) {
-#   out <- .row(dim = c(m, times))
-#   dim(out) <- NULL
-#   out
-# }
-
 
 #' Fast OHE
 #' 
@@ -54,49 +46,14 @@ fdummy <- function(x) {
 #' @noRd
 #' @keywords internal
 #' 
-#' @param x A vector, factor, or matrix-like.
+#' @param x A vector or matrix-like.
 #' @param w Optional case weights.
 #' @returns A vector of column means.
 wcolMeans <- function(x, w = NULL) {
-  if (is.factor(x)) {
-    if (is.null(w)) {
-      return(colMeans_factor(x))
-    }
-    x <- fdummy(x)
-  } else if (!is.matrix(x)) {
+  if (!is.matrix(x)) {
     x <- as.matrix(x)
   }
-  if (is.null(w)) {
-    return(colMeans(x))
-  }
-  if (length(w) != nrow(x)) {
-    stop("w must be of length nrow(x).")
-  }
-  if (!is.double(w)) {
-    w <- as.double(w)
-  }
-  colSums(x * w) / sum(w)
-}
-
-#' colMeans() for Factors
-#'
-#' Internal function used to calculate proportions of factor levels.
-#' It is less memory-hungry than `colMeans(fdummy(x))`, and much faster.
-#' A weighted version via `rowsum(w, x)` is not consistently faster than
-#' `wcolMeans(fdummy(x))`, so we currently focus on the non-weighted case.
-#' Furthermore, `rowsum()` drops empty factor levels.
-#'
-#' @noRd
-#' @keywords internal
-#'
-#' @param x Factor-like.
-#' @returns Named vector.
-colMeans_factor <- function(x) {
-  x <- as.factor(x)
-  lev <- levels(x)
-  out <- tabulate(x, nbins = length(lev)) / length(x)
-  names(out) <- lev
-  out
+  if (is.null(w)) colMeans(x) else colSums(x * w) / sum(w)
 }
 
 #' Grouped Column Means
@@ -106,7 +63,7 @@ colMeans_factor <- function(x) {
 #' @noRd
 #' @keywords internal
 #' 
-#' @param x Vector, factor or matrix-like.
+#' @param x Vector or matrix-like.
 #' @param ngroups Number of groups (`x` was stacked that many times).
 #' @param w Optional vector with case weights of length `NROW(x) / ngroups`.
 #' @returns A (g x K) matrix, where g is the number of groups, and K = NCOL(x).
@@ -115,51 +72,13 @@ wrowmean <- function(x, ngroups = 1L, w = NULL) {
     return(t.default(wcolMeans(x, w = w)))
   }
   
-  # Prep w
-  if (!is.null(w)) {
-    if (length(w) != NROW(x) %/% ngroups) {
-      stop("w must be of length NROW(x) / ngroups.")
-    }
-    if (!is.double(w)) {
-      w <- as.double(w)
-    }
-  }
-  
-  # Very fast case for factors w/o weights and vectors/1d-matrices
-  if (is.factor(x)) {
-    if (is.null(w)) {
-      return(rowmean_factor(x, ngroups = ngroups))
-    }
-    x <- fdummy(x)
-  } 
+  # Very fast case for 1d structures
   if (is.vector(x) || (is.matrix(x) && ncol(x) == 1L)) {
     return(wrowmean_vector(x, ngroups = ngroups, w = w))
   }
-
-  # General version
   wrowmean_matrix(x, ngroups = ngroups, w = w)
 }
 
-#' (w)rowmean() for Factors (without weights)
-#'
-#' Grouped `colMeans_factor()` for equal sized groups.
-#'
-#' @noRd
-#' @keywords internal
-#'
-#' @param x Factor-like.
-#' @param ngroups Number of subsequent, equals sized groups.
-#' @returns Matrix with column names.
-rowmean_factor <- function(x, ngroups = 1L) {
-  x <- as.factor(x)
-  lev <- levels(x)
-  n_bg <- length(x) %/% ngroups
-  dim(x) <- c(n_bg, ngroups)
-  out <- t.default(apply(x, 2L, FUN = tabulate, nbins = length(lev)))
-  colnames(out) <- lev
-  out / n_bg
-}
-
 #' wrowmean() for Vectors
 #'
 #' Grouped means over fixed-length groups for vectors or 1d matrices.
@@ -170,14 +89,11 @@ rowmean_factor <- function(x, ngroups = 1L) {
 #' @param x Vector or matrix with one column.
 #' @param ngroups Number of subsequent, equals sized groups.
 #' @param w Optional vector of case weights of length `NROW(x) / ngroups`.
-#' @returns Matrix.
+#' @returns Matrix with one column.
 wrowmean_vector <- function(x, ngroups = 1L, w = NULL) {
-  if (!(is.vector(x) || (is.matrix(x) && ncol(x) == 1L))) {
-    stop("x must be a vector or a single column matrix")
-  }
   nm <- if (is.matrix(x)) colnames(x)
   dim(x) <- c(length(x) %/% ngroups, ngroups)
-  out <- if (is.null(w)) colMeans(x) else colSums(x * w) / sum(w)
+  out <- wcolMeans(x, w = w)
   dim(out) <- c(ngroups, 1L)
   if (!is.null(nm)) {
     colnames(out) <- nm
@@ -202,7 +118,6 @@ wrowmean_matrix <- function(x, ngroups = 1L, w = NULL) {
   }
   n_bg <- nrow(x) %/% ngroups
   g <- rep_each(ngroups, each = n_bg)  # rep(seq_len(ngroups), each = n_bg)
-  # Even faster: cbind(collapse::fmean(x, g = g, w = w))
   if (is.null(w)) {
     out <- rowsum(x, group = g, reorder = FALSE) / n_bg
   } else {
@@ -221,7 +136,7 @@ wrowmean_matrix <- function(x, ngroups = 1L, w = NULL) {
 #' @noRd
 #' @keywords internal
 #' 
-#' @param x A vector, factor or matrix-like object.
+#' @param x A vector or matrix-like object.
 #' @param g Optional grouping variable.
 #' @param w Optional case weights.
 #' @returns A list with two elements: "M" represents a matrix of grouped (column) 
@@ -236,17 +151,12 @@ gwColMeans <- function(x, g = NULL, w = NULL) {
   }
   
   # Now the interesting case
-  if (is.factor(x)) {
-    x <- fdummy(x)
-  } else if (!is.vector(x) && !is.matrix(x)) {
+  if (!is.vector(x) || !is.matrix(x)) {
     x <- as.matrix(x)
   }
   if (is.null(w)) {
     w <- rep.int(1, NROW(x))
   } else {
-    if (!is.double(w)) {
-      w <- as.double(w)
-    }
     x <- x * w  # w is correctly recycled over columns
   }
   denom <- as.numeric(rowsum(w, group = g))
diff --git a/R/utils_input.R b/R/utils_input.R
index 1f5505aa..10c8eb7a 100644
--- a/R/utils_input.R
+++ b/R/utils_input.R
@@ -115,4 +115,3 @@ prepare_y <- function(y, X) {
   }
   list(y = prepare_pred(y), y_names = y_names)
 }
-

From dbca7266e707a74692eb054a8ffa80ae1f893634 Mon Sep 17 00:00:00 2001
From: Michael Mayer <mayermichael79@gmail.com>
Date: Fri, 5 Jul 2024 21:37:35 +0200
Subject: [PATCH 3/6] Adabt functions that use response

---
 NEWS.md                               |   2 +-
 R/average_loss.R                      |  15 ++--
 R/losses.R                            | 109 +++++++++++++-------------
 R/perm_importance.R                   |  13 +--
 R/utils_input.R                       |  22 ++++--
 man/average_loss.Rd                   |  12 +--
 man/perm_importance.Rd                |  15 ++--
 tests/testthat/test_average_loss.R    |  42 +---------
 tests/testthat/test_input.R           |   6 +-
 tests/testthat/test_losses.R          |  30 +------
 tests/testthat/test_perm_importance.R |  34 +-------
 11 files changed, 102 insertions(+), 198 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 9009049c..2ef616b1 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,6 @@
 # hstats 1.2.0
 
-# Major changes
+## Major changes
 
 - Factor-valued predictions are no longer possible.
 - Consequently, also removed "classification_error" loss.
diff --git a/R/average_loss.R b/R/average_loss.R
index 2a08510c..a1e322a3 100644
--- a/R/average_loss.R
+++ b/R/average_loss.R
@@ -22,25 +22,19 @@
 #'   or as discrete vector with corresponding levels. 
 #'   The latter case is turned into a dummy matrix by a fast version of
 #'   `model.matrix(~ as.factor(y) + 0)`.
-#' - "classification_error": Misclassification error. Both the 
-#'   observed values `y` and the predictions can be character/factor. This
-#'   loss function can be used in non-probabilistic classification settings.
-#'   BUT: Probabilistic classification (with "mlogloss") is clearly preferred in most
-#'   situations.
 #' - A function with signature `f(actual, predicted)`, returning a numeric 
 #'   vector or matrix of the same length as the input.
 #'
 #' @inheritParams hstats
 #' @param y Vector/matrix of the response, or the corresponding column names in `X`.
 #' @param loss One of "squared_error", "logloss", "mlogloss", "poisson",
-#'   "gamma", "absolute_error", "classification_error". Alternatively, a loss function 
+#'   "gamma", or "absolute_error". Alternatively, a loss function 
 #'   can be provided that turns observed and predicted values into a numeric vector or 
 #'   matrix of unit losses of the same length as `X`.
 #'   For "mlogloss", the response `y` can either be a dummy matrix or a discrete vector. 
 #'   The latter case is handled via a fast version of `model.matrix(~ as.factor(y) + 0)`.
-#'   For "classification_error", both predictions and responses can be non-numeric.
-#'   For "squared_error", both predictions and responses can be factors with identical
-#'   levels. In this case, squared error is evaulated for each one-hot-encoded column.
+#'   For "squared_error", the response can be a factor with levels in column order of
+#'   the predictions. In this case, squared error is evaluated for each one-hot-encoded column.
 #' @param agg_cols Should multivariate losses be summed up? Default is `FALSE`.
 #'   In combination with the squared error loss, `agg_cols = TRUE` gives
 #'   the Brier score for (probabilistic) classification.
@@ -116,7 +110,8 @@ average_loss.default <- function(object, X, y,
 #' @describeIn average_loss Method for "ranger" models.
 #' @export
 average_loss.ranger <- function(object, X, y, 
-                                pred_fun = function(m, X, ...) stats::predict(m, X, ...)$predictions,
+                                pred_fun = function(m, X, ...)
+                                  stats::predict(m, X, ...)$predictions,
                                 loss = "squared_error",
                                 agg_cols = FALSE,
                                 BY = NULL, by_size = 4L, 
diff --git a/R/losses.R b/R/losses.R
index c6b517fe..b207b5ba 100644
--- a/R/losses.R
+++ b/R/losses.R
@@ -1,3 +1,25 @@
+#' Input Checks for Losses
+#' 
+#' Internal function with general input checks.
+#' 
+#' @noRd
+#' @keywords internal
+#'
+#' @param actual Actual values.
+#' @param predicted Predictions.
+#' @returns `TRUE`
+check_loss <- function(actual, predicted) {
+  stopifnot(
+    is.vector(actual) || is.matrix(actual),
+    is.vector(predicted) || is.matrix(predicted),
+    is.numeric(actual),
+    is.numeric(predicted),
+    NROW(actual) == NROW(predicted),
+    NCOL(actual) == 1L || NCOL(actual) == NCOL(predicted)
+  )
+  return(TRUE)
+}
+
 #' Squared Error Loss
 #' 
 #' Internal function. Calculates squared error.
@@ -5,14 +27,13 @@
 #' @noRd
 #' @keywords internal
 #'
-#' @param actual A numeric vector/matrix, or factor.
-#' @param predicted A numeric vector/matrix, or factor.
+#' @param actual A numeric vector or matrix.
+#' @param predicted A numeric vector or matrix.
 #' @returns Vector or matrix of numeric losses.
 loss_squared_error <- function(actual, predicted) {
-  actual <- drop(prepare_pred(actual, ohe = TRUE))
-  predicted <- prepare_pred(predicted, ohe = TRUE)
-
-  (actual - predicted)^2
+  check_loss(actual, predicted)
+  
+  return((drop(actual) - predicted)^2)
 }
 
 #' Absolute Error Loss
@@ -26,13 +47,9 @@ loss_squared_error <- function(actual, predicted) {
 #' @param predicted A numeric vector/matrix.
 #' @returns Vector or matrix of numeric losses.
 loss_absolute_error <- function(actual, predicted) {
-  actual <- drop(prepare_pred(actual))
-  predicted <- prepare_pred(predicted)
-  if (is.factor(actual) || is.factor(predicted)) {
-    stop("Absolute loss does not make sense for factors.")
-  }
-  
-  abs(actual - predicted)
+  check_loss(actual, predicted)
+
+  return(abs(drop(actual) - predicted))
 }
 
 #' Poisson Deviance Loss
@@ -46,20 +63,18 @@ loss_absolute_error <- function(actual, predicted) {
 #' @param predicted A numeric vector/matrix with non-negative values.
 #' @returns Vector or matrix of numeric losses.
 loss_poisson <- function(actual, predicted) {
-  actual <- drop(prepare_pred(actual))
-  predicted <- prepare_pred(predicted)
-  if (is.factor(actual) || is.factor(predicted)) {
-    stop("Poisson loss does not make sense for factors.")
-  }
+  check_loss(actual, predicted)
   stopifnot(
     all(predicted >= 0),
     all(actual >= 0)
   )
   
+  actual <- drop(actual)
+  
   out <- predicted
   p <- actual > 0
   out[p] <- (actual * log(actual / predicted) - (actual  - predicted))[p]
-  2 * out
+  return(2 * out)
 }
 
 #' Gamma Deviance Loss
@@ -73,34 +88,15 @@ loss_poisson <- function(actual, predicted) {
 #' @param predicted A numeric vector/matrix with positive values.
 #' @returns Vector or matrix of numeric losses.
 loss_gamma <- function(actual, predicted) {
-  actual <- drop(prepare_pred(actual))
-  predicted <- prepare_pred(predicted)
-  if (is.factor(actual) || is.factor(predicted)) {
-    stop("Gamma loss does not make sense for factors.")
-  }
+  check_loss(actual, predicted)
   stopifnot(
     all(predicted > 0), 
     all(actual > 0)
   )
   
-  -2 * (log(actual / predicted) - (actual - predicted) / predicted)
-}
-
-#' Classification Error Loss
-#' 
-#' Internal function. Calculates per-row misclassification errors.
-#' 
-#' @noRd
-#' @keywords internal
-#'
-#' @param actual A vector/factor/matrix with discrete values.
-#' @param predicted A vector/factor/matrix with discrete values.
-#' @returns Vector or matrix of numeric losses.
-loss_classification_error <- function(actual, predicted) {
-  actual <- drop(prepare_pred(actual))
-  predicted <- prepare_pred(predicted)
+  actual <- drop(actual)
   
-  (actual != predicted) * 1.0
+  return(-2 * (log(actual / predicted) - (actual - predicted) / predicted))
 }
 
 #' Log Loss
@@ -115,11 +111,7 @@ loss_classification_error <- function(actual, predicted) {
 #' @param predicted A numeric vector/matrix with values between 0 and 1.
 #' @returns Vector or matrix of numeric losses.
 loss_logloss <- function(actual, predicted) {
-  actual <- drop(prepare_pred(actual))
-  predicted <- prepare_pred(predicted)
-  if (is.factor(actual) || is.factor(predicted)) {
-    stop("Log loss does not make sense for factors.")
-  }
+  check_loss(actual, predicted)
   stopifnot(
     all(predicted >= 0), 
     all(predicted <= 1),
@@ -127,7 +119,9 @@ loss_logloss <- function(actual, predicted) {
     all(actual <= 1)
   )
   
-  -xlogy(actual, predicted) - xlogy(1 - actual, 1 - predicted)
+  actual <- drop(actual)
+  
+  return(-xlogy(actual, predicted) - xlogy(1 - actual, 1 - predicted))
 }
 
 #' Multi-Column Log Loss
@@ -139,28 +133,34 @@ loss_logloss <- function(actual, predicted) {
 #' @keywords internal
 #'
 #' @param actual A numeric matrix with values between 0 and 1, or a 
-#'   discrete vector that will be one-hot-encoded by a fast version of
-#'   `model.matrix(~ as.factor(actual) + 0)`.
+#'   factor, or a discrete numeric vector that will be one-hot-encoded by a 
+#'   fast version of `model.matrix(~ as.factor(actual) + 0)`.
 #'   The column order of `predicted` must be in line with this!
 #' @param predicted A numeric matrix with values between 0 and 1.
-#' @returns `TRUE` (or an error message).
+#' @returns A numeric vector of losses.
 loss_mlogloss <- function(actual, predicted) {
-  actual <- prepare_pred(actual)
-  predicted <- prepare_pred(predicted)
   if (NCOL(actual) == 1L) {  # not only for factors
     actual <- fdummy(actual)
   }
+  
   stopifnot(
+    is.matrix(actual),
     is.matrix(predicted),
-    ncol(predicted) >= 2L,
+    
+    is.numeric(actual),
+    is.numeric(predicted),
+    
+    nrow(actual) == nrow(predicted),
     ncol(actual) == ncol(predicted),
+    ncol(predicted) >= 2L,
+    
     all(predicted >= 0), 
     all(predicted <= 1),
     all(actual >= 0),
     all(actual <= 1)
   )
   
-  unname(-rowSums(xlogy(actual, predicted)))
+  return(unname(-rowSums(xlogy(actual, predicted))))
 }
 
 #' Calculates x*log(y)
@@ -198,7 +198,6 @@ get_loss_fun <- function(loss) {
     poisson = loss_poisson,
     gamma = loss_gamma,
     absolute_error = loss_absolute_error,
-    classification_error = loss_classification_error,
     stop("Unknown loss function.")
   )
 }
diff --git a/R/perm_importance.R b/R/perm_importance.R
index 3c4d5d99..5df1b02e 100644
--- a/R/perm_importance.R
+++ b/R/perm_importance.R
@@ -2,7 +2,7 @@
 #' 
 #' Calculates permutation importance for a set of features or a set of feature groups. 
 #' By default, importance is calculated for all columns in `X` (except column names
-#' used as response `y` or case weight `w`).
+#' used as response `y` or as case weight `w`).
 #' 
 #' The permutation importance of a feature is defined as the increase in the average
 #' loss when shuffling the corresponding feature values before calculating predictions.
@@ -31,7 +31,7 @@
 #' # MODEL 1: Linear regression
 #' fit <- lm(Sepal.Length ~ ., data = iris)
 #' s <- perm_importance(fit, X = iris, y = "Sepal.Length")
-
+#'
 #' s
 #' s$M
 #' s$SE  # Standard errors are available thanks to repeated shuffling
@@ -103,7 +103,7 @@ perm_importance.default <- function(object, X, y, v = NULL,
   if (nrow(X) > n_max) {
     ix <- sample(nrow(X), n_max)
     X <- X[ix, , drop = FALSE]
-    if (is.vector(y) || is.factor(y)) {
+    if (is.vector(y)) {
       y <- y[ix]
     } else {
       y <- y[ix, , drop = FALSE]
@@ -126,10 +126,10 @@ perm_importance.default <- function(object, X, y, v = NULL,
   if (m_rep > 1L) {
     ind <- rep.int(seq_len(n), m_rep)
     X <- rep_rows(X, ind)
-    if (is.vector(y) || is.factor(y)) {
+    if (is.vector(y)) {
       y <- y[ind]
     } else {
-      y <- rep_rows(y, ind)
+      y <- y[ind, , drop = FALSE]
     }
   }
   
@@ -206,7 +206,8 @@ perm_importance.default <- function(object, X, y, v = NULL,
 #' @describeIn perm_importance Method for "ranger" models.
 #' @export
 perm_importance.ranger <- function(object, X, y, v = NULL,
-                                   pred_fun = function(m, X, ...) stats::predict(m, X, ...)$predictions,
+                                   pred_fun = function(m, X, ...)
+                                     stats::predict(m, X, ...)$predictions,
                                    loss = "squared_error", m_rep = 4L, 
                                    agg_cols = FALSE, 
                                    normalize = FALSE, n_max = 10000L, 
diff --git a/R/utils_input.R b/R/utils_input.R
index 10c8eb7a..d3d162f0 100644
--- a/R/utils_input.R
+++ b/R/utils_input.R
@@ -11,7 +11,7 @@ prepare_pred <- function(x) {
   if (is.data.frame(x) && ncol(x) == 1L) {
     x <- x[[1L]]
   }
-  if (!is.vector(x) || !is.matrix(x)) {
+  if (!is.vector(x) && !is.matrix(x)) {
     x <- as.matrix(x)
   }
   if (!is.numeric(x)) {
@@ -95,11 +95,11 @@ prepare_w <- function(w, X) {
 #' 
 #' @noRd
 #' @keywords internal
-#' @param y Vector/matrix-like of the same length as `X`, or column names in `X`.
+#' @param y Vector, factor, or matrix-like of the same length as `X`,
+#'   or column names in `X`.
 #' @param X Matrix-like.
-#' @param ohe If y is a factor: should it be one-hot encoded? Default is `FALSE`.
 #' 
-#' @returns A list with "y" (vector, matrix, or factor) and "y_names" (if `y`
+#' @returns A list with "y" (vector, matrix) and "y_names" (if `y`
 #'   was passed as column names).
 prepare_y <- function(y, X) {
   if (NROW(y) < nrow(X) && all(y %in% colnames(X))) {
@@ -110,8 +110,20 @@ prepare_y <- function(y, X) {
       y <- X[, y]
     }
   } else {
+    if (is.data.frame(y) && ncol(y) == 1L) {
+      y <- y[[1L]]
+    }
     stopifnot(NROW(y) == nrow(X))
     y_names <- NULL
   }
-  list(y = prepare_pred(y), y_names = y_names)
+  if (is.factor(y)) {
+    y <- fdummy(y)
+  }
+  if (!is.vector(y) && !is.matrix(y)) {
+    y <- as.matrix(y)
+  }
+  if (!is.numeric(y)) {
+    stop("Response must be numeric (or factor.)")
+  }
+  list(y = y, y_names = y_names)
 }
diff --git a/man/average_loss.Rd b/man/average_loss.Rd
index abac4d7a..01af2a54 100644
--- a/man/average_loss.Rd
+++ b/man/average_loss.Rd
@@ -67,14 +67,13 @@ model) can be passed via \code{...}. The default, \code{\link[stats:predict]{sta
 most cases.}
 
 \item{loss}{One of "squared_error", "logloss", "mlogloss", "poisson",
-"gamma", "absolute_error", "classification_error". Alternatively, a loss function
+"gamma", or "absolute_error". Alternatively, a loss function
 can be provided that turns observed and predicted values into a numeric vector or
 matrix of unit losses of the same length as \code{X}.
 For "mlogloss", the response \code{y} can either be a dummy matrix or a discrete vector.
 The latter case is handled via a fast version of \code{model.matrix(~ as.factor(y) + 0)}.
-For "classification_error", both predictions and responses can be non-numeric.
-For "squared_error", both predictions and responses can be factors with identical
-levels. In this case, squared error is evaulated for each one-hot-encoded column.}
+For "squared_error", the response can be a factor with levels in column order of
+the predictions. In this case, squared error is evaluated for each one-hot-encoded column.}
 
 \item{agg_cols}{Should multivariate losses be summed up? Default is \code{FALSE}.
 In combination with the squared error loss, \code{agg_cols = TRUE} gives
@@ -136,11 +135,6 @@ The observed values \code{y} are either passed as (n x K) dummy matrix,
 or as discrete vector with corresponding levels.
 The latter case is turned into a dummy matrix by a fast version of
 \code{model.matrix(~ as.factor(y) + 0)}.
-\item "classification_error": Misclassification error. Both the
-observed values \code{y} and the predictions can be character/factor. This
-loss function can be used in non-probabilistic classification settings.
-BUT: Probabilistic classification (with "mlogloss") is clearly preferred in most
-situations.
 \item A function with signature \code{f(actual, predicted)}, returning a numeric
 vector or matrix of the same length as the input.
 }
diff --git a/man/perm_importance.Rd b/man/perm_importance.Rd
index d47df3f9..af27a2a9 100644
--- a/man/perm_importance.Rd
+++ b/man/perm_importance.Rd
@@ -80,14 +80,13 @@ model) can be passed via \code{...}. The default, \code{\link[stats:predict]{sta
 most cases.}
 
 \item{loss}{One of "squared_error", "logloss", "mlogloss", "poisson",
-"gamma", "absolute_error", "classification_error". Alternatively, a loss function
+"gamma", or "absolute_error". Alternatively, a loss function
 can be provided that turns observed and predicted values into a numeric vector or
 matrix of unit losses of the same length as \code{X}.
 For "mlogloss", the response \code{y} can either be a dummy matrix or a discrete vector.
 The latter case is handled via a fast version of \code{model.matrix(~ as.factor(y) + 0)}.
-For "classification_error", both predictions and responses can be non-numeric.
-For "squared_error", both predictions and responses can be factors with identical
-levels. In this case, squared error is evaulated for each one-hot-encoded column.}
+For "squared_error", the response can be a factor with levels in column order of
+the predictions. In this case, squared error is evaluated for each one-hot-encoded column.}
 
 \item{m_rep}{Number of permutations (default 4).}
 
@@ -122,7 +121,7 @@ Currently, supported only for \code{\link[=perm_importance]{perm_importance()}}.
 \description{
 Calculates permutation importance for a set of features or a set of feature groups.
 By default, importance is calculated for all columns in \code{X} (except column names
-used as response \code{y} or case weight \code{w}).
+used as response \code{y} or as case weight \code{w}).
 }
 \details{
 The permutation importance of a feature is defined as the increase in the average
@@ -161,11 +160,6 @@ The observed values \code{y} are either passed as (n x K) dummy matrix,
 or as discrete vector with corresponding levels.
 The latter case is turned into a dummy matrix by a fast version of
 \code{model.matrix(~ as.factor(y) + 0)}.
-\item "classification_error": Misclassification error. Both the
-observed values \code{y} and the predictions can be character/factor. This
-loss function can be used in non-probabilistic classification settings.
-BUT: Probabilistic classification (with "mlogloss") is clearly preferred in most
-situations.
 \item A function with signature \code{f(actual, predicted)}, returning a numeric
 vector or matrix of the same length as the input.
 }
@@ -175,6 +169,7 @@ vector or matrix of the same length as the input.
 # MODEL 1: Linear regression
 fit <- lm(Sepal.Length ~ ., data = iris)
 s <- perm_importance(fit, X = iris, y = "Sepal.Length")
+
 s
 s$M
 s$SE  # Standard errors are available thanks to repeated shuffling
diff --git a/tests/testthat/test_average_loss.R b/tests/testthat/test_average_loss.R
index b7d58998..a6a7786d 100644
--- a/tests/testthat/test_average_loss.R
+++ b/tests/testthat/test_average_loss.R
@@ -219,10 +219,10 @@ test_that("average_loss() works with weights and grouped (multi)", {
   expect_equal(s3, s4)
 })
 
-test_that("mlogloss works with either matrix y or vector y", {
+test_that("mlogloss works with either matrix y or factor y", {
   pf <- function(m, X) cbind(1 - (0.1 + 0.7 * (X == 1)), 0.1 + 0.7 * (X == 1))
   X <- cbind(z = c(1, 0, 0, 1, 1))
-  y <- c("B", "A", "B", "B", "B")
+  y <- factor(c("B", "A", "B", "B", "B"))
   Y <- model.matrix(~ y + 0)
   s1 <- average_loss(NULL, X = X, y = Y, loss = "mlogloss", pred_fun = pf)
   s2 <- average_loss(NULL, X = X, y = y, loss = "mlogloss", pred_fun = pf)
@@ -230,7 +230,7 @@ test_that("mlogloss works with either matrix y or vector y", {
 })
 
 test_that("loss_mlogloss() is in line with loss_logloss() in binary case", {
-  y <- iris$Species == "setosa"
+  y <- (iris$Species == "setosa") * 1
   Y <- cbind(no = 1 - y, yes = y)
   fit <- glm(y ~ Sepal.Length, data = iris, family = binomial())
   pf <- function(m, X, multi = FALSE) {
@@ -245,42 +245,6 @@ test_that("loss_mlogloss() is in line with loss_logloss() in binary case", {
   )
 })
 
-test_that("classification_error works on factors", {
-  pf <- function(m, X) rep("setosa", times = nrow(X))
-  expect_error(average_loss(1, X = iris, y = iris$Species, pred_fun = pf))
-  expect_equal(
-    c(average_loss(
-      1, X = iris, y = iris$Species, pred_fun = pf, loss = "classification_error"
-    )$M),
-    2/3
-  )
-  expect_equal(
-    c(average_loss(
-      1, 
-      X = iris, 
-      y = iris$Species, 
-      pred_fun = pf, 
-      loss = "classification_error",
-      BY = iris$Species
-    )$M),
-    c(0, 1, 1)
-  )
-})
-
-test_that("factor predictions work as well (squared error)", {
-  expect_equal(
-    average_loss(
-      1, 
-      X = iris, 
-      y = iris$Species, 
-      pred_fun = function(m, X)
-        factor(rep("setosa", times = nrow(X)), levels = levels(iris$Species)),
-      agg_cols = TRUE
-    )$M,
-    matrix((0 + 2 + 2) / 3)
-  )
-})
-
 test_that("average_loss() works with missing values", {
   X <- data.frame(x1 = 1:6, x2 = c(NA, 1, 2, 1, 1, 3), x3 = factor(c("A", NA, NA, "B", "A", "A")))
   y <- 1:6
diff --git a/tests/testthat/test_input.R b/tests/testthat/test_input.R
index 274e4b55..f8a19102 100644
--- a/tests/testthat/test_input.R
+++ b/tests/testthat/test_input.R
@@ -1,6 +1,6 @@
 test_that("prepare_pred() works", {
   expect_equal(prepare_pred(iris[1:4]), data.matrix(iris[1:4]))
-  expect_equal(prepare_pred(iris["Species"]), iris$Species)
+  expect_error(prepare_pred(iris["Species"]))
   expect_equal(prepare_pred(iris$Sepal.Width), iris$Sepal.Width)
   expect_equal(prepare_pred(iris["Sepal.Width"]), iris$Sepal.Width)
 })
@@ -24,7 +24,7 @@ test_that("prepare_by() works", {
 test_that("prepare_y() works", {
   # "Vector" interface
   expect_equal(prepare_y(iris[1:4], X = iris)$y, data.matrix(iris[1:4]))
-  expect_equal(prepare_y(iris["Species"], X = iris)$y, iris$Species)
+  expect_equal(prepare_y(iris["Species"], X = iris)$y, fdummy(iris$Species))
   expect_equal(prepare_y(iris$Sepal.Width, X = iris)$y, iris$Sepal.Width)
   expect_equal(prepare_y(iris["Sepal.Width"], X = iris)$y, iris$Sepal.Width)
   
@@ -35,7 +35,7 @@ test_that("prepare_y() works", {
   expect_equal(out$y_names, cn)
   
   out <- prepare_y("Species", X = iris)
-  expect_equal(out$y, iris$Species)
+  expect_equal(out$y, fdummy(iris$Species))
   expect_equal(out$y_names, "Species")
   
   out <- prepare_y("Sepal.Width", X = iris)
diff --git a/tests/testthat/test_losses.R b/tests/testthat/test_losses.R
index e7bf3c23..72ca51c9 100644
--- a/tests/testthat/test_losses.R
+++ b/tests/testthat/test_losses.R
@@ -54,7 +54,8 @@ test_that("losses are identical to stats package for specific case (multivariate
   expect_equal(loss_gamma(y, p), Gamma()$dev.resid(y2, p, 1))
   
   y <- 0
-  y2 <- replicate(2, y)
+  y2 <- cbind(y, y)
+  colnames(y2) <- NULL
   p <- rbind(replicate(2, 0.1))
   
   expect_equal(loss_poisson(y2, p), poisson()$dev.resid(y2, p, 1))
@@ -92,26 +93,6 @@ test_that("loss_absolute_error() works for specific case (multivariate)", {
   expect_equal(loss_absolute_error(y, p), abs(y2 - p))
 })
 
-test_that("loss_classification_error() works for specific case", {
-  y <- iris$Species
-  p <- rev(iris$Species)
-  expect_equal(loss_classification_error(y, p), 0 + (y != p))
-  
-  # Single input
-  y <- y[1L]
-  p <- rev(iris$Species)[1L]
-  expect_equal(loss_classification_error(y, p), 0 + (y != p))
-})
-
-test_that("loss_classification_error() works for specific case (multivariate)", {
-  y <- iris$Species
-  y2 <- replicate(2L, y)
-  p <- y2[nrow(y2):1, ]
-  colnames(p) <- c("a", "b")
-  expect_equal(loss_classification_error(y2, p), 0 + (y2 != p))
-  expect_equal(loss_classification_error(y, p), 0 + (y2 != p))
-})
-
 test_that("loss_mlogloss() is in line with loss_logloss() in binary case", {
   y <- c(0, 0, 0, 1, 1)
   pred <- c(0, 0.1, 0.2, 1, 0.9)
@@ -135,13 +116,6 @@ test_that("loss_mlogloss() either understands matrix responses or factors", {
   expect_equal(loss_mlogloss(Y, pred), loss_mlogloss(y, pred))
 })
 
-test_that("squared error can be calculated on factors", {
-  expect_equal(
-    colSums(loss_squared_error(iris$Species, rev(iris$Species))),
-    c(setosa = 100, versicolor = 0, virginica = 100)
-  )
-})
-
 test_that("Some errors are thrown", {
   expect_error(loss_poisson(-1:1, 1:2))
   expect_error(loss_poisson(0:1, -1:0))
diff --git a/tests/testthat/test_perm_importance.R b/tests/testthat/test_perm_importance.R
index 6fe356bb..3cd45085 100644
--- a/tests/testthat/test_perm_importance.R
+++ b/tests/testthat/test_perm_importance.R
@@ -155,36 +155,6 @@ test_that("matrix case works as well", {
   expect_equal(s2$M["Petal.Length", ], s3$M["Petal.Length", ])
 })
 
-test_that("non-numeric predictions can work as well (classification error)", {
-  expect_equal(
-    c(perm_importance(
-      1, 
-      v = "Sepal.Length", 
-      X = iris, 
-      y = iris$Species, 
-      pred_fun = function(m, X) rep("setosa", times = nrow(X)), 
-      loss = "classification_error", 
-      verbose = FALSE
-    )$M),
-    0
-  )
-})
-
-test_that("factor predictions can work as well (squared error)", {
-  expect_equal(
-    perm_importance(
-      1, 
-      v = "Sepal.Length", 
-      X = iris, 
-      y = iris$Species, 
-      pred_fun = function(m, X)
-        factor(rep("setosa", times = nrow(X)), levels = levels(iris$Species)), 
-      verbose = FALSE
-    )$M,
-    rbind(Sepal.Length = c(setosa = 0, versicolor = 0, virginica = 0))
-  )
-})
-
 #================================================
 # Multivariate model
 #================================================
@@ -356,7 +326,7 @@ test_that("groups of variables work as well", {
 test_that("mlogloss works with either matrix y or vector y", {
   pred_fun <- function(m, X) cbind(1 - (0.1 + 0.7 * (X == 1)), 0.1 + 0.7 * (X == 1))
   X <- cbind(z = c(1, 0, 0, 1, 1))
-  y <- c("B", "A", "B", "B", "B")
+  y <- c(2, 1, 2, 2, 2)
   Y <- model.matrix(~ y + 0)
   set.seed(1L)
   s1 <- perm_importance(
@@ -389,7 +359,7 @@ test_that("Single output multiple models works without recycling y", {
 })
 
 test_that("loss_mlogloss() is in line with loss_logloss() in binary case", {
-  y <- iris$Species == "setosa"
+  y <- (iris$Species == "setosa") * 1
   Y <- cbind(no = 1 - y, yes = y)
   fit <- glm(y ~ Sepal.Length, data = iris, family = binomial())
   pf <- function(m, X, multi = FALSE) {

From 79601884382825e29ca591123711fee64b58fa0d Mon Sep 17 00:00:00 2001
From: Michael Mayer <mayermichael79@gmail.com>
Date: Fri, 5 Jul 2024 21:52:01 +0200
Subject: [PATCH 4/6] Update README

---
 README.md | 35 ++---------------------------------
 1 file changed, 2 insertions(+), 33 deletions(-)

diff --git a/README.md b/README.md
index 342ed982..0da5a4a7 100644
--- a/README.md
+++ b/README.md
@@ -369,33 +369,6 @@ plot(H, normalize = FALSE, squared = FALSE, facet_scales = "free_y", ncol = 1)
 
 ![](man/figures/xgboost.svg)
 
-### Non-probabilistic classification
-
-When predictions are factor levels, {hstats} uses internal one-hot-encoding. Usually, probabilistic classification makes more sense though.
-
-```r
-library(ranger)
-
-set.seed(1)
-
-fit <- ranger(Species ~ ., data = train)
-average_loss(fit, X = valid, y = "Species", loss = "classification_error")  # 0
-
-perm_importance(fit, X = valid, y = "Species", loss = "classification_error")
-# Petal.Width Petal.Length Sepal.Length  Sepal.Width 
-#       0.350        0.275        0.000        0.000 
-       
-(s <- hstats(fit, X = train[, -5]))
-# H^2 (normalized)
-#     setosa versicolor  virginica 
-# 0.09885417 0.46151300 0.23238800 
-
-plot(s, normalize = FALSE, squared = FALSE)
-
-partial_dep(fit, v = "Petal.Length", X = train, BY = "Petal.Width") |> 
-  plot(show_points = FALSE)
-```
-
 ## Meta-learning packages
 
 Here, we provide examples for {tidymodels}, {caret}, and {mlr3}.
@@ -469,18 +442,14 @@ set.seed(1)
 task_iris <- TaskClassif$new(id = "class", backend = iris, target = "Species")
 fit_rf <- lrn("classif.ranger", predict_type = "prob")
 fit_rf$train(task_iris)
-s <- hstats(fit_rf, X = iris[, -5])
+s <- hstats(fit_rf, X = iris[, -5], predict_type = "prob")
 plot(s)
 
-# Permutation importance (probabilistic using multi-logloss)
+# Permutation importance (wrt multi-logloss)
 p <- perm_importance(
   fit_rf, X = iris, y = "Species", loss = "mlogloss", predict_type = "prob"
 )
 plot(p)
-
-# Non-probabilistic using classification error
-perm_importance(fit_rf, X = iris, y = "Species", loss = "classification_error") |> 
-  plot()
 ```
 
 ## Background

From f003dc3ab9593a558dab24729eaa89b3141db0a1 Mon Sep 17 00:00:00 2001
From: Michael Mayer <mayermichael79@gmail.com>
Date: Sat, 6 Jul 2024 18:11:41 +0200
Subject: [PATCH 5/6] Delegate OHE of response to losses

---
 NEWS.md                               |  4 ++
 R/losses.R                            | 18 +++---
 R/perm_importance.R                   |  8 +--
 R/utils_input.R                       |  9 +--
 backlog/benchmark.R                   | 80 ++++++++++++++-------------
 backlog/colMeans_factors.R            | 27 ---------
 tests/testthat/test_average_loss.R    |  2 +-
 tests/testthat/test_input.R           |  4 +-
 tests/testthat/test_perm_importance.R |  2 +-
 9 files changed, 68 insertions(+), 86 deletions(-)
 delete mode 100644 backlog/colMeans_factors.R

diff --git a/NEWS.md b/NEWS.md
index 2ef616b1..7465a113 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -5,6 +5,10 @@
 - Factor-valued predictions are no longer possible.
 - Consequently, also removed "classification_error" loss.
 
+## Minor changes
+
+- Code simplifications.
+
 # hstats 1.1.2
 
 ## ICE plots
diff --git a/R/losses.R b/R/losses.R
index b207b5ba..f9b40de1 100644
--- a/R/losses.R
+++ b/R/losses.R
@@ -12,8 +12,8 @@ check_loss <- function(actual, predicted) {
   stopifnot(
     is.vector(actual) || is.matrix(actual),
     is.vector(predicted) || is.matrix(predicted),
-    is.numeric(actual),
-    is.numeric(predicted),
+    is.numeric(actual) || is.logical(actual),
+    is.numeric(predicted) || is.logical(predicted),
     NROW(actual) == NROW(predicted),
     NCOL(actual) == 1L || NCOL(actual) == NCOL(predicted)
   )
@@ -27,10 +27,14 @@ check_loss <- function(actual, predicted) {
 #' @noRd
 #' @keywords internal
 #'
-#' @param actual A numeric vector or matrix.
+#' @param actual A numeric vector or matrix, or a factor with levels in the same order 
+#'   as the column names of `predicted`.
 #' @param predicted A numeric vector or matrix.
 #' @returns Vector or matrix of numeric losses.
 loss_squared_error <- function(actual, predicted) {
+  if (is.factor(actual)) {
+    actual <- fdummy(actual)
+  }
   check_loss(actual, predicted)
   
   return((drop(actual) - predicted)^2)
@@ -147,11 +151,10 @@ loss_mlogloss <- function(actual, predicted) {
     is.matrix(actual),
     is.matrix(predicted),
     
-    is.numeric(actual),
-    is.numeric(predicted),
+    is.numeric(actual) || is.logical(actual),
+    is.numeric(predicted) || is.logical(predicted),
     
-    nrow(actual) == nrow(predicted),
-    ncol(actual) == ncol(predicted),
+    dim(actual) == dim(predicted),
     ncol(predicted) >= 2L,
     
     all(predicted >= 0), 
@@ -176,6 +179,7 @@ loss_mlogloss <- function(actual, predicted) {
 xlogy <- function(x, y) {
   out <- x * log(y)
   out[x == 0] <- 0
+  
   return(out)
 }
 
diff --git a/R/perm_importance.R b/R/perm_importance.R
index 5df1b02e..20fdcfb5 100644
--- a/R/perm_importance.R
+++ b/R/perm_importance.R
@@ -103,9 +103,9 @@ perm_importance.default <- function(object, X, y, v = NULL,
   if (nrow(X) > n_max) {
     ix <- sample(nrow(X), n_max)
     X <- X[ix, , drop = FALSE]
-    if (is.vector(y)) {
+    if (is.vector(y) || is.factor(y)) {
       y <- y[ix]
-    } else {
+    } else {  # matrix case
       y <- y[ix, , drop = FALSE]
     }
     if (!is.null(w)) {
@@ -126,9 +126,9 @@ perm_importance.default <- function(object, X, y, v = NULL,
   if (m_rep > 1L) {
     ind <- rep.int(seq_len(n), m_rep)
     X <- rep_rows(X, ind)
-    if (is.vector(y)) {
+    if (is.vector(y) || is.factor(y)) {
       y <- y[ind]
-    } else {
+    } else {  # matrix case
       y <- y[ind, , drop = FALSE]
     }
   }
diff --git a/R/utils_input.R b/R/utils_input.R
index d3d162f0..d0845e36 100644
--- a/R/utils_input.R
+++ b/R/utils_input.R
@@ -14,7 +14,7 @@ prepare_pred <- function(x) {
   if (!is.vector(x) && !is.matrix(x)) {
     x <- as.matrix(x)
   }
-  if (!is.numeric(x)) {
+  if (!is.numeric(x) && !is.logical(x)) {
     stop("Predictions must be numeric!")
   }
   return(x)
@@ -116,13 +116,10 @@ prepare_y <- function(y, X) {
     stopifnot(NROW(y) == nrow(X))
     y_names <- NULL
   }
-  if (is.factor(y)) {
-    y <- fdummy(y)
-  }
-  if (!is.vector(y) && !is.matrix(y)) {
+  if (!is.vector(y) && !is.matrix(y) && !is.factor(y)) {
     y <- as.matrix(y)
   }
-  if (!is.numeric(y)) {
+  if (!is.numeric(y) && !is.logical(y) && !is.factor(y)) {
     stop("Response must be numeric (or factor.)")
   }
   list(y = y, y_names = y_names)
diff --git a/backlog/benchmark.R b/backlog/benchmark.R
index c367b679..3923a1c9 100644
--- a/backlog/benchmark.R
+++ b/backlog/benchmark.R
@@ -115,12 +115,11 @@ bench::mark(
   check = FALSE,
   min_iterations = 3
 )
-
-#   expression      min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time
-# 1 iml           1.72s    1.75s     0.574   210.6MB    1.34      3     7      5.23s <NULL>
-# 2 dalex      744.82ms 760.02ms     1.31     35.2MB    0.877     3     2      2.28s <NULL>
-# 3 flashlight    1.29s    1.35s     0.742      63MB    0.990     3     4      4.04s <NULL>
-# 4 hstats     407.26ms 412.31ms     2.43     26.5MB    0         3     0      1.23s <NULL>
+#   expression      min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time result
+# 1 iml           1.76s    1.76s     0.565   211.6MB    3.39      3    18      5.31s <NULL>
+# 2 dalex      688.54ms 697.71ms     1.44     35.2MB    1.91      3     4      2.09s <NULL>
+# 3 flashlight 667.51ms 676.07ms     1.47     28.1MB    1.96      3     4      2.04s <NULL>
+# 4 hstats     392.15ms 414.41ms     2.39     26.6MB    0.796     3     1      1.26s <NULL>  
 
 # Partial dependence (cont)
 v <- "tot_lvg_area"
@@ -132,12 +131,12 @@ bench::mark(
   check = FALSE,
   min_iterations = 3
 )
-#   expression      min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time
-# 1 iml           1.14s    1.16s     0.861   376.7MB    3.73      3    13      3.48s <NULL>
-# 2 dalex      653.24ms 654.51ms     1.35    192.8MB    2.24      3     5      2.23s <NULL>
-# 3 flashlight 352.34ms 361.79ms     2.72     66.7MB    0.906     3     1       1.1s <NULL>
-# 4 hstats     239.03ms 242.79ms     4.04     14.2MB    1.35      3     1   743.43ms <NULL>
-  
+#     expression      min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time result
+# <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl> <int> <dbl>   <bch:tm> <list>
+#   1 iml            1.2s     1.4s     0.726   376.9MB     4.12     3    17      4.13s <NULL>
+#   2 dalex       759.3ms  760.6ms     1.28    192.8MB     2.55     3     6      2.35s <NULL>
+#   3 flashlight  369.1ms  403.1ms     2.55     66.8MB     2.55     3     3      1.18s <NULL>
+#   4 hstats      242.1ms  243.8ms     4.03     14.2MB     0        3     0   744.25ms <NULL>#   
 # Partial dependence (discrete)
 v <- "structure_quality"
 bench::mark(
@@ -148,30 +147,31 @@ bench::mark(
   check = FALSE,
   min_iterations = 3
 )
-# expression        min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time
-# 1 iml         100.6ms  103.6ms      9.46   13.34MB     0        5     0      529ms <NULL>
-# 2 dalex       172.4ms  177.9ms      5.62   20.55MB     2.81     2     1      356ms <NULL>
-# 3 flashlight   43.5ms   45.5ms     21.9     6.36MB     2.19    10     1      457ms <NULL>
-# 4 hstats       25.3ms   25.8ms     37.9     1.54MB     2.10    18     1      475ms <NULL>
-
+#     expression      min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time result
+# <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl> <int> <dbl>   <bch:tm> <list>
+#   1 iml         107.9ms    108ms      9.26   13.64MB     9.26     2     2      216ms <NULL>
+#   2 dalex         172ms  172.2ms      5.81   21.14MB     2.90     2     1      344ms <NULL>
+#   3 flashlight   40.3ms   41.6ms     23.8     8.61MB     2.16    11     1      462ms <NULL>
+#   4 hstats       24.5ms   25.9ms     35.5     1.64MB     0       18     0      507ms <NULL>
+  
 # H-Stats -> we use a subset of 500 rows
 X_v500 <- X_valid[1:500, ]
 mod500 <- Predictor$new(fit, data = as.data.frame(X_v500), predict.function = predf)
 fl500 <- flashlight(fl, data = as.data.frame(valid[1:500, ]))
 
-# iml  # 225s total, using slow exact calculations
-system.time(  # 90s
+# iml  # 243s total, using slow exact calculations
+system.time(  # 110s
   iml_overall <- Interaction$new(mod500, grid.size = 500)
 )
-system.time(  # 135s for all combinations of latitude
+system.time(  # 133s for all combinations of latitude
   iml_pairwise <- Interaction$new(mod500, grid.size = 500, feature = "latitude")
 )
 
-# flashlight: 13s total, doing only one pairwise calculation, otherwise would take 63s
-system.time(  # 11.5s
+# flashlight: 14s total, doing only one pairwise calculation, otherwise would take 63s
+system.time(  # 11.7s
   fl_overall <- light_interaction(fl500, v = x, grid_size = Inf, n_max = Inf)
 )
-system.time(  # 2.4s
+system.time(  # 2.3s
   fl_pairwise <- light_interaction(
     fl500, v = coord, grid_size = Inf, n_max = Inf, pairwise = TRUE
   )
@@ -185,34 +185,38 @@ system.time({
 }
 )
 
-# Using 50 quantiles to approximate dense numerics: 0.9s
+# Using 50 quantiles to approximate dense numerics: 0.8s
 system.time(
   H_approx <- hstats(fit, v = x, X = X_v500, n_max = Inf, approx = TRUE)
 )
 
 # Overall statistics correspond exactly
-iml_overall$results |> filter(.interaction > 1e-6)
+iml_overall$results |>
+  filter(.interaction > 1e-6)
 #     .feature .interaction
-# 1:  latitude    0.2791144
-# 2: longitude    0.2791144
+# 1:  latitude    0.2458269
+# 2: longitude    0.2458269
 
-fl_overall$data |> subset(value > 0, select = c(variable, value))
-#   variable  value
-# 1 latitude  0.279
-# 2 longitude 0.279
+fl_overall$data |>
+  subset(value_ > 0, select = c(variable_, value_))
+#   variable_    value_
+# 3  latitude 0.2458269
+# 4 longitude 0.2458269
 
 hstats_overall
 # longitude  latitude 
-# 0.2791144 0.2791144 
+# 0.2458269 0.2458269 
 
 # Pairwise results match as well
-iml_pairwise$results |> filter(.interaction > 1e-6)
+iml_pairwise$results |>
+  filter(.interaction > 1e-6)
 #              .feature .interaction
-# 1: longitude:latitude    0.4339574
+# 1: longitude:latitude    0.3942526
 
-fl_pairwise$data |> subset(value > 0, select = c(variable, value))
-# latitude:longitude 0.434
+fl_pairwise$data |>
+  subset(value_ > 0, select = c(variable_, value_))
+# latitude:longitude 0.3942526
 
 hstats_pairwise
 # latitude:longitude 
-# 0.4339574
\ No newline at end of file
+# 0.3942526
\ No newline at end of file
diff --git a/backlog/colMeans_factors.R b/backlog/colMeans_factors.R
deleted file mode 100644
index adcc54c5..00000000
--- a/backlog/colMeans_factors.R
+++ /dev/null
@@ -1,27 +0,0 @@
-#' gcolMeans() for Factors
-#'
-#' Grouped version of `colMeans_factor()`.
-#'
-#' @noRd
-#' @keywords internal
-#'
-#' @params x Factor.
-#' @returns Named vector.
-gcolMeans_factor <- function(x, g = NULL) {
-  if (is.null(g)) {
-    colMeans_factor(x)
-  }
-  x <- as.factor(x)
-  out <- t.default(sapply(split.default(x, g), colMeans_factor))
-  colnames(out) <- levels(x)
-  out
-}
-
-wrowmean_matrix2 <- function(x, ngroups = 1L, w = NULL) {
-  if (!is.matrix(x)) {
-    stop("x must be a matrix.")
-  }
-  dim(x) <- c(nrow(x)/ngroups, ngroups, ncol(x))
-  out <- colMeans(aperm(x, c(1, 3, 2)))
-  t.default(out)
-}
diff --git a/tests/testthat/test_average_loss.R b/tests/testthat/test_average_loss.R
index a6a7786d..fd8d1c6f 100644
--- a/tests/testthat/test_average_loss.R
+++ b/tests/testthat/test_average_loss.R
@@ -230,7 +230,7 @@ test_that("mlogloss works with either matrix y or factor y", {
 })
 
 test_that("loss_mlogloss() is in line with loss_logloss() in binary case", {
-  y <- (iris$Species == "setosa") * 1
+  y <- (iris$Species == "setosa")
   Y <- cbind(no = 1 - y, yes = y)
   fit <- glm(y ~ Sepal.Length, data = iris, family = binomial())
   pf <- function(m, X, multi = FALSE) {
diff --git a/tests/testthat/test_input.R b/tests/testthat/test_input.R
index f8a19102..3bb5616e 100644
--- a/tests/testthat/test_input.R
+++ b/tests/testthat/test_input.R
@@ -24,7 +24,7 @@ test_that("prepare_by() works", {
 test_that("prepare_y() works", {
   # "Vector" interface
   expect_equal(prepare_y(iris[1:4], X = iris)$y, data.matrix(iris[1:4]))
-  expect_equal(prepare_y(iris["Species"], X = iris)$y, fdummy(iris$Species))
+  expect_equal(prepare_y(iris["Species"], X = iris)$y, iris$Species)
   expect_equal(prepare_y(iris$Sepal.Width, X = iris)$y, iris$Sepal.Width)
   expect_equal(prepare_y(iris["Sepal.Width"], X = iris)$y, iris$Sepal.Width)
   
@@ -35,7 +35,7 @@ test_that("prepare_y() works", {
   expect_equal(out$y_names, cn)
   
   out <- prepare_y("Species", X = iris)
-  expect_equal(out$y, fdummy(iris$Species))
+  expect_equal(out$y, iris$Species)
   expect_equal(out$y_names, "Species")
   
   out <- prepare_y("Sepal.Width", X = iris)
diff --git a/tests/testthat/test_perm_importance.R b/tests/testthat/test_perm_importance.R
index 3cd45085..15247a83 100644
--- a/tests/testthat/test_perm_importance.R
+++ b/tests/testthat/test_perm_importance.R
@@ -359,7 +359,7 @@ test_that("Single output multiple models works without recycling y", {
 })
 
 test_that("loss_mlogloss() is in line with loss_logloss() in binary case", {
-  y <- (iris$Species == "setosa") * 1
+  y <- (iris$Species == "setosa")
   Y <- cbind(no = 1 - y, yes = y)
   fit <- glm(y ~ Sepal.Length, data = iris, family = binomial())
   pf <- function(m, X, multi = FALSE) {

From 2278089e432b1e00ffd00eed8253fc0830fc5dfa Mon Sep 17 00:00:00 2001
From: Michael Mayer <mayermichael79@gmail.com>
Date: Sun, 7 Jul 2024 10:00:46 +0200
Subject: [PATCH 6/6] Review of PR

---
 R/utils_calculate.R | 2 +-
 R/utils_input.R     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/utils_calculate.R b/R/utils_calculate.R
index a1f5620c..8ee58827 100644
--- a/R/utils_calculate.R
+++ b/R/utils_calculate.R
@@ -151,7 +151,7 @@ gwColMeans <- function(x, g = NULL, w = NULL) {
   }
   
   # Now the interesting case
-  if (!is.vector(x) || !is.matrix(x)) {
+  if (!is.vector(x) && !is.matrix(x)) {
     x <- as.matrix(x)
   }
   if (is.null(w)) {
diff --git a/R/utils_input.R b/R/utils_input.R
index d0845e36..f46257c6 100644
--- a/R/utils_input.R
+++ b/R/utils_input.R
@@ -99,7 +99,7 @@ prepare_w <- function(w, X) {
 #'   or column names in `X`.
 #' @param X Matrix-like.
 #' 
-#' @returns A list with "y" (vector, matrix) and "y_names" (if `y`
+#' @returns A list with "y" (vector, matrix, or factor) and "y_names" (if `y`
 #'   was passed as column names).
 prepare_y <- function(y, X) {
   if (NROW(y) < nrow(X) && all(y %in% colnames(X))) {