From 81e372a2194419b7ca8b701277ad2e6eb3fd99c5 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 19 Mar 2024 13:46:07 -0700 Subject: [PATCH 1/8] dont import from keras --- NAMESPACE | 11 ----------- R/embed-package.R | 11 ----------- R/embed.R | 24 ++++++++++++------------ 3 files changed, 12 insertions(+), 34 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 6c09f61..424966b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -113,17 +113,6 @@ importFrom(generics,required_pkgs) importFrom(generics,tidy) importFrom(generics,tunable) importFrom(glue,glue) -importFrom(keras,backend) -importFrom(keras,compile) -importFrom(keras,fit) -importFrom(keras,get_layer) -importFrom(keras,keras_model) -importFrom(keras,keras_model_sequential) -importFrom(keras,layer_concatenate) -importFrom(keras,layer_dense) -importFrom(keras,layer_embedding) -importFrom(keras,layer_flatten) -importFrom(keras,layer_input) importFrom(lifecycle,deprecated) importFrom(recipes,remove_original_cols) importFrom(stats,as.formula) diff --git a/R/embed-package.R b/R/embed-package.R index d611099..3d6bfdb 100644 --- a/R/embed-package.R +++ b/R/embed-package.R @@ -14,17 +14,6 @@ #' @importFrom dplyr mutate #' @importFrom dplyr one_of #' @importFrom glue glue -#' @importFrom keras backend -#' @importFrom keras compile -#' @importFrom keras fit -#' @importFrom keras get_layer -#' @importFrom keras keras_model -#' @importFrom keras keras_model_sequential -#' @importFrom keras layer_concatenate -#' @importFrom keras layer_dense -#' @importFrom keras layer_embedding -#' @importFrom keras layer_flatten -#' @importFrom keras layer_input #' @importFrom lifecycle deprecated #' @importFrom recipes remove_original_cols #' @importFrom stats as.formula diff --git a/R/embed.R b/R/embed.R index 067fccb..b4e697f 100644 --- a/R/embed.R +++ b/R/embed.R @@ -306,7 +306,7 @@ tf_coefs2 <- function(x, y, z, opt, num, lab, h, seeds = sample.int(10000, 4), inputs <- vector(mode = "list", length = p) # For each categorical predictor, make an input layer for (i in 1:p) { - inputs[[i]] <- layer_input(shape = 1, name = paste0("input_", vars[i])) + inputs[[i]] <- keras::layer_input(shape = 1, name = paste0("input_", vars[i])) } layers <- vector(mode = "list", length = p) @@ -314,32 +314,32 @@ tf_coefs2 <- function(x, y, z, opt, num, lab, h, seeds = sample.int(10000, 4), for (i in 1:p) { layers[[i]] <- inputs[[i]] %>% - layer_embedding( + keras::layer_embedding( input_dim = length(lvl[[i]]) + 1, output_dim = num, input_length = 1, name = paste0("layer_", vars[i]) ) %>% - layer_flatten() + keras::layer_flatten() } if (is.null(z)) { if (p > 1) { - all_layers <- layer_concatenate(layers) + all_layers <- keras::layer_concatenate(layers) } else { all_layers <- layers[[1]] } } else { mats$z <- as.matrix(z) - pred_layer <- layer_input(shape = ncol(z), name = "other_pred") - all_layers <- layer_concatenate(c(layers, pred_layer)) + pred_layer <- keras::layer_input(shape = ncol(z), name = "other_pred") + all_layers <- keras::layer_concatenate(c(layers, pred_layer)) inputs <- c(inputs, pred_layer) } if (h > 0) { all_layers <- all_layers %>% - layer_dense( + keras::layer_dense( units = h, activation = "relu", name = "hidden_layer", kernel_initializer = keras::initializer_glorot_uniform(seed = seeds[3]) ) @@ -348,14 +348,14 @@ tf_coefs2 <- function(x, y, z, opt, num, lab, h, seeds = sample.int(10000, 4), if (factor_y) { all_layers <- all_layers %>% - layer_dense( + keras::layer_dense( units = ncol(y), activation = "softmax", name = "output_layer", kernel_initializer = keras::initializer_glorot_uniform(seed = seeds[4]) ) } else { all_layers <- all_layers %>% - layer_dense( + keras::layer_dense( units = 1, activation = "linear", name = "output_layer", kernel_initializer = keras::initializer_glorot_uniform(seed = seeds[4]) ) @@ -365,7 +365,7 @@ tf_coefs2 <- function(x, y, z, opt, num, lab, h, seeds = sample.int(10000, 4), keras::keras_model(inputs = inputs, outputs = all_layers) model %>% - compile( + keras::compile( loss = opt$loss, metrics = opt$metrics, optimizer = opt$optimizer @@ -373,7 +373,7 @@ tf_coefs2 <- function(x, y, z, opt, num, lab, h, seeds = sample.int(10000, 4), history <- model %>% - fit( + keras::fit( x = unname(mats), y = y, epochs = opt$epochs, @@ -387,7 +387,7 @@ tf_coefs2 <- function(x, y, z, opt, num, lab, h, seeds = sample.int(10000, 4), for (i in 1:p) { layer_values[[i]] <- - get_layer(model, paste0("layer_", vars[i]))$get_weights() %>% + keras::get_layer(model, paste0("layer_", vars[i]))$get_weights() %>% as.data.frame() %>% setNames(names0(num, paste0(vars[i], "_embed_"))) %>% as_tibble() %>% From 8a4d5940efccb4625812041f1927592eb9a447e2 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 19 Mar 2024 13:52:30 -0700 Subject: [PATCH 2/8] check keras is installed for step_embed() --- R/embed.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/embed.R b/R/embed.R index b4e697f..2666551 100644 --- a/R/embed.R +++ b/R/embed.R @@ -273,6 +273,8 @@ is_tf_2 <- function() { tf_coefs2 <- function(x, y, z, opt, num, lab, h, seeds = sample.int(10000, 4), ...) { + rlang::check_installed("keras") + vars <- names(x) p <- length(vars) From de7f6802cf9a024a044c8f1e7b22adb5b89594a5 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 19 Mar 2024 13:52:41 -0700 Subject: [PATCH 3/8] more keras and tensorflow to suggests --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e750c61..67eb31d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,13 +23,11 @@ Imports: glue, dplyr (>= 1.1.0), generics (>= 0.1.0), - keras, lifecycle, purrr, rlang (>= 0.4.10), rsample, stats, - tensorflow, tibble, tidyr, utils, @@ -42,6 +40,7 @@ Suggests: ggplot2, hardhat, irlba, + keras, knitr, lme4, modeldata, @@ -49,6 +48,7 @@ Suggests: rpart, rstanarm, stringdist, + tensorflow, testthat (>= 3.0.0), VBsparsePCA, xgboost From daa0968574e218892b3124a04b646fdcb32b2418 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 19 Mar 2024 13:54:00 -0700 Subject: [PATCH 4/8] check keras installed in feature_hash --- R/feature_hash.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/feature_hash.R b/R/feature_hash.R index 472a88b..2dddeac 100644 --- a/R/feature_hash.R +++ b/R/feature_hash.R @@ -164,6 +164,8 @@ make_hash_vars <- function(x, prefix, num_hash = 2^8) { uni_x <- unique(x) + rlang::check_installed("keras") + column_int <- purrr::map_int( uni_x, From 3075401c4b4e144bab8211945b334c3b1107e423 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 19 Mar 2024 14:08:30 -0700 Subject: [PATCH 5/8] don't link to keras directly --- R/embed.R | 6 +++--- R/feature_hash.R | 2 +- man/step_embed.Rd | 6 +++--- man/step_feature_hash.Rd | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/embed.R b/R/embed.R index 2666551..495d366 100644 --- a/R/embed.R +++ b/R/embed.R @@ -128,7 +128,7 @@ #' "Concatenate Embeddings for Categorical Variables with Keras" #' \url{https://flovv.github.io/Embeddings_with_keras_part2/} #' -#' @examplesIf !embed:::is_cran_check() && rlang::is_installed("modeldata") +#' @examplesIf !embed:::is_cran_check() && rlang::is_installed(c("modeldata", "keras")) #' data(grants, package = "modeldata") #' #' set.seed(1) @@ -479,9 +479,9 @@ print.step_embed <- #' @export #' @rdname step_embed -#' @param optimizer,loss,metrics Arguments to pass to [keras::compile()] +#' @param optimizer,loss,metrics Arguments to pass to keras::compile() #' @param epochs,validation_split,batch_size,verbose,callbacks Arguments to pass -#' to [keras::fit()] +#' to keras::fit() embed_control <- function(loss = "mse", metrics = NULL, optimizer = "sgd", diff --git a/R/feature_hash.R b/R/feature_hash.R index 2dddeac..e67790a 100644 --- a/R/feature_hash.R +++ b/R/feature_hash.R @@ -55,7 +55,7 @@ #' Approach for Predictive Models_. CRC/Chapman Hall #' \url{https://bookdown.org/max/FES/encoding-predictors-with-many-categories.html} #' @seealso [recipes::step_dummy()], [recipes::step_zv()] -#' @examplesIf !embed:::is_cran_check() && rlang::is_installed("modeldata") +#' @examplesIf !embed:::is_cran_check() && rlang::is_installed(c("modeldata", "keras")) #' data(grants, package = "modeldata") #' rec <- #' recipe(class ~ sponsor_code, data = grants_other) %>% diff --git a/man/step_embed.Rd b/man/step_embed.Rd index debb58a..e9a3a26 100644 --- a/man/step_embed.Rd +++ b/man/step_embed.Rd @@ -81,10 +81,10 @@ processing the outcome variable(s)). Care should be taken when using \code{skip \item{id}{A character string that is unique to this step to identify it.} -\item{optimizer, loss, metrics}{Arguments to pass to \code{\link[keras:reexports]{keras::compile()}}} +\item{optimizer, loss, metrics}{Arguments to pass to keras::compile()} \item{epochs, validation_split, batch_size, verbose, callbacks}{Arguments to pass -to \code{\link[keras:reexports]{keras::fit()}}} +to keras::fit()} } \value{ An updated version of \code{recipe} with the new step added to the @@ -179,7 +179,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (!embed:::is_cran_check() && rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (!embed:::is_cran_check() && rlang::is_installed(c("modeldata", "keras"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(grants, package = "modeldata") set.seed(1) diff --git a/man/step_feature_hash.Rd b/man/step_feature_hash.Rd index 659c07a..cd30694 100644 --- a/man/step_feature_hash.Rd +++ b/man/step_feature_hash.Rd @@ -99,7 +99,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (!embed:::is_cran_check() && rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (!embed:::is_cran_check() && rlang::is_installed(c("modeldata", "keras"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(grants, package = "modeldata") rec <- recipe(class ~ sponsor_code, data = grants_other) \%>\% From c24a7a2da73a70bbdfeaf5767f862a750f414fc2 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 19 Mar 2024 14:08:44 -0700 Subject: [PATCH 6/8] add skip_if_not_installed to step_embed --- tests/testthat/test-embed.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/testthat/test-embed.R b/tests/testthat/test-embed.R index 2ce0ab2..369c085 100644 --- a/tests/testthat/test-embed.R +++ b/tests/testthat/test-embed.R @@ -6,6 +6,7 @@ withr::local_envvar(TF_CPP_MIN_LOG_LEVEL = "2") test_that("factor encoded predictor", { skip_on_cran() + skip_if_not_installed("keras") skip_if(!embed:::is_tf_available()) class_test <- recipe(x2 ~ ., data = ex_dat) %>% @@ -76,6 +77,7 @@ test_that("factor encoded predictor", { test_that("character encoded predictor", { skip_on_cran() + skip_if_not_installed("keras") skip_if(!embed:::is_tf_available()) class_test <- recipe(x2 ~ ., data = ex_dat_ch) %>% @@ -144,6 +146,7 @@ test_that("character encoded predictor", { test_that("factor encoded predictor", { skip_on_cran() + skip_if_not_installed("keras") skip_if(!embed:::is_tf_available()) class_test <- recipe(x1 ~ ., data = ex_dat) %>% @@ -213,6 +216,7 @@ test_that("factor encoded predictor", { test_that("character encoded predictor", { skip_on_cran() + skip_if_not_installed("keras") skip_if(!embed:::is_tf_available()) class_test <- recipe(x1 ~ ., data = ex_dat_ch) %>% @@ -281,6 +285,7 @@ test_that("character encoded predictor", { test_that("bad args", { skip_on_cran() + skip_if_not_installed("keras") skip_if(!embed:::is_tf_available()) three_class <- iris @@ -297,6 +302,7 @@ test_that("bad args", { test_that("check_name() is used", { skip_on_cran() + skip_if_not_installed("keras") skip_if(!embed:::is_tf_available()) dat <- ex_dat @@ -330,6 +336,7 @@ test_that("tunable", { test_that("bake method errors when needed non-standard role columns are missing", { skip_on_cran() + skip_if_not_installed("keras") skip_if(!embed:::is_tf_available()) rec <- recipe(x2 ~ ., data = ex_dat) %>% step_embed( @@ -351,6 +358,7 @@ test_that("bake method errors when needed non-standard role columns are missing" test_that("empty printing", { skip_on_cran() + skip_if_not_installed("keras") skip_if(!embed:::is_tf_available()) rec <- recipe(mpg ~ ., mtcars) @@ -396,6 +404,7 @@ test_that("empty selection tidy method works", { test_that("keep_original_cols works", { skip_on_cran() + skip_if_not_installed("keras") skip_if(!embed:::is_tf_available()) new_names <- c("x2", "x3_embed_1", "x3_embed_2") @@ -427,6 +436,7 @@ test_that("keep_original_cols works", { test_that("keep_original_cols - can prep recipes with it missing", { skip_on_cran() + skip_if_not_installed("keras") skip_if(!embed:::is_tf_available()) rec <- recipe(x2 ~ x3, data = ex_dat) %>% @@ -446,6 +456,7 @@ test_that("keep_original_cols - can prep recipes with it missing", { test_that("printing", { skip_on_cran() + skip_if_not_installed("keras") skip_if(!embed:::is_tf_available()) rec <- recipe(x2 ~ ., data = ex_dat_ch) %>% From cac5d08ad52f79050afbe2d27d3c2caae90868d6 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 19 Mar 2024 14:10:10 -0700 Subject: [PATCH 7/8] add enws --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 38d424c..e4b4170 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,8 @@ * Documentation for tidy methods for all steps has been improved to describe the return value more accurately. (#217) +* {keras} and {tensorflow} have been moved to Suggests instead of Imports. (#218) + # embed 1.1.3 * `step_collapse_stringdist()` will now return predictors as factors. (#204) From 8c33289f107ba51888cf98194be05665b80879c3 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 19 Mar 2024 14:25:27 -0700 Subject: [PATCH 8/8] don't install tensorflow in check-hard --- .github/workflows/R-CMD-check-hard.yaml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/.github/workflows/R-CMD-check-hard.yaml b/.github/workflows/R-CMD-check-hard.yaml index 4a6a9e9..64f517e 100644 --- a/.github/workflows/R-CMD-check-hard.yaml +++ b/.github/workflows/R-CMD-check-hard.yaml @@ -52,17 +52,6 @@ jobs: any::rmarkdown needs: check pak-version: devel - - - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - - name: Install TensorFlow - run: | - reticulate::virtualenv_create('r-reticulate', python='3.9') - reticulate::use_virtualenv('r-reticulate') - tensorflow::install_tensorflow(version='2.11.0') - shell: Rscript {0} - uses: r-lib/actions/check-r-package@v2 with: