Skip to content

Commit

Permalink
Merge pull request #251 from tidymodels/RC-1.0.5
Browse files Browse the repository at this point in the history
  • Loading branch information
EmilHvitfeldt authored Oct 23, 2023
2 parents a121d6f + 59e6775 commit 30e607a
Show file tree
Hide file tree
Showing 13 changed files with 113 additions and 56 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: textrecipes
Title: Extra 'Recipes' for Text Processing
Version: 1.0.4.9000
Version: 1.0.5.9000
Authors@R: c(
person("Emil", "Hvitfeldt", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-0679-1945")),
Expand Down Expand Up @@ -32,6 +32,7 @@ Imports:
glue
Suggests:
covr,
data.table,
dials (>= 1.2.0),
hardhat,
janitor,
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# textrecipes (development version)

* `step_untokenize()` and `step_normalization()` now returns factors instead of strings.
# textrecipes 1.0.5

* `step_untokenize()` and `step_normalization()` now returns factors instead of strings. (#247)
# textrecipes 1.0.4

## Improvements
Expand Down
17 changes: 14 additions & 3 deletions R/dummy_hash.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,20 @@
#' @seealso [recipes::step_dummy()]
#' @family Steps for Numeric Variables From Characters
#'
#' @examplesIf rlang::is_installed("text2vec")
#' \dontrun{
#' @examplesIf all(c("text2vec", "data.table") %in% rownames(installed.packages()))
#' \dontshow{library(data.table)}
#' \dontshow{data.table::setDTthreads(2)}
#' \dontshow{Sys.setenv("OMP_NUM_THREADS" = 1)}
#' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 1)}
#' \dontshow{Sys.setenv("rsparse_omp_threads" = 1L)}
#' \dontshow{options(rsparse_omp_threads = 1L)}
#' \dontshow{library(text2vec)}
#' \dontshow{Sys.setenv("OMP_NUM_THREADS" = 1)}
#' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 1)}
#' \dontshow{Sys.setenv("rsparse_omp_threads" = 1L)}
#' \dontshow{options(rsparse_omp_threads = 1L)}
#' \dontshow{options("text2vec.mc.cores" = 1)}
#'
#' library(recipes)
#' library(modeldata)
#' data(grants)
Expand All @@ -82,7 +94,6 @@
#'
#' tidy(grants_rec, number = 1)
#' tidy(grants_obj, number = 1)
#' }
#' @export
step_dummy_hash <-
function(recipe,
Expand Down
5 changes: 4 additions & 1 deletion R/lda.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@
#'
#' @family Steps for Numeric Variables From Tokens
#'
#' @examplesIf rlang::is_installed("text2vec")
#' @examplesIf all(c("text2vec", "data.table") %in% rownames(installed.packages()))
#' \dontshow{library(data.table)}
#' \dontshow{data.table::setDTthreads(2)}
#' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)}
#' library(recipes)
#' library(modeldata)
#' data(tate_text)
Expand Down
5 changes: 4 additions & 1 deletion R/texthash.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,10 @@
#' [step_text_normalization()] to perform text normalization.
#' @family Steps for Numeric Variables From Tokens
#'
#' @examplesIf rlang::is_installed("text2vec")
#' @examplesIf all(c("text2vec", "data.table") %in% rownames(installed.packages()))
#' \dontshow{library(data.table)}
#' \dontshow{data.table::setDTthreads(2)}
#' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)}
#' library(recipes)
#' library(modeldata)
#' data(tate_text)
Expand Down
2 changes: 1 addition & 1 deletion cran-comments.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## Release Summary

This is the 18th CRAN release of textrecipes.
This is the 19th CRAN release of textrecipes.

## R CMD check results

Expand Down
17 changes: 14 additions & 3 deletions man/step_dummy_hash.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/step_lda.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/step_texthash.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 20 additions & 21 deletions revdep/README.md
Original file line number Diff line number Diff line change
@@ -1,33 +1,32 @@
# Platform

|field |value |
|:--------|:------------------------------------------------------------------------------------------|
|version |R version 4.3.0 (2023-04-21) |
|os |macOS Ventura 13.5 |
|system |aarch64, darwin20 |
|ui |RStudio |
|language |(EN) |
|collate |en_US.UTF-8 |
|ctype |en_US.UTF-8 |
|tz |America/Los_Angeles |
|date |2023-08-14 |
|rstudio |2023.09.0-daily+310 Desert Sunflower (desktop) |
|pandoc |3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown) |
|field |value |
|:--------|:--------------------------------------------------|
|version |R version 4.3.1 (2023-06-16) |
|os |macOS Ventura 13.6 |
|system |aarch64, darwin20 |
|ui |X11 |
|language |(EN) |
|collate |en_US.UTF-8 |
|ctype |en_US.UTF-8 |
|tz |America/Los_Angeles |
|date |2023-10-17 |
|pandoc |3.1.3 @ /Users/emilhvitfeldt/miniforge3/bin/pandoc |

# Dependencies

|package |old |new |Δ |
|:------------|:----------|:----------|:--|
|textrecipes |1.0.3 |1.0.3.9000 |* |
|textrecipes |1.0.4 |1.0.4.9000 |* |
|cli |3.6.1 |3.6.1 | |
|clock |0.7.0 |0.7.0 | |
|cpp11 |0.4.6 |0.4.6 | |
|data.table |1.14.8 |1.14.8 | |
|diagram |1.6.5 |1.6.5 | |
|digest |0.6.33 |0.6.33 | |
|dplyr |1.1.2 |1.1.2 | |
|dplyr |1.1.3 |1.1.3 | |
|ellipsis |0.3.2 |0.3.2 | |
|fansi |1.0.4 |1.0.4 | |
|fansi |1.0.5 |1.0.5 | |
|future |1.33.0 |1.33.0 | |
|future.apply |1.11.0 |1.11.0 | |
|generics |0.1.3 |0.1.3 | |
Expand All @@ -39,18 +38,18 @@
|lava |1.7.2.1 |1.7.2.1 | |
|lifecycle |1.0.3 |1.0.3 | |
|listenv |0.9.0 |0.9.0 | |
|lubridate |1.9.2 |1.9.2 | |
|lubridate |1.9.3 |1.9.3 | |
|magrittr |2.0.3 |2.0.3 | |
|numDeriv |2016.8-1.1 |2016.8-1.1 | |
|parallelly |1.36.0 |1.36.0 | |
|pillar |1.9.0 |1.9.0 | |
|pkgconfig |2.0.3 |2.0.3 | |
|prodlim |2023.03.31 |2023.03.31 | |
|prodlim |2023.08.28 |2023.08.28 | |
|progressr |0.14.0 |0.14.0 | |
|purrr |1.0.2 |1.0.2 | |
|R6 |2.5.1 |2.5.1 | |
|Rcpp |1.0.11 |1.0.11 | |
|recipes |1.0.7 |1.0.7 | |
|recipes |1.0.8 |1.0.8 | |
|rlang |1.1.1 |1.1.1 | |
|shape |1.4.6 |1.4.6 | |
|SnowballC |0.7.1 |0.7.1 | |
Expand All @@ -65,8 +64,8 @@
|tokenizers |0.3.0 |0.3.0 | |
|tzdb |0.4.0 |0.4.0 | |
|utf8 |1.2.3 |1.2.3 | |
|vctrs |0.6.3 |0.6.3 | |
|withr |2.5.0 |2.5.0 | |
|vctrs |0.6.4 |0.6.4 | |
|withr |2.5.1 |2.5.1 | |

# Revdeps

27 changes: 18 additions & 9 deletions tests/testthat/test-dummy_hash.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ rec <- recipe(~., data = test_data)

test_that("hashing gives double outputs", {
skip_if_not_installed("text2vec")
skip_on_cran() # because data.table uses all cores by default
skip_if_not_installed("data.table")
data.table::setDTthreads(2) # because data.table uses all cores by default

rec <- rec %>%
step_dummy_hash(sponsor_code)
Expand All @@ -30,7 +31,8 @@ test_that("hashing gives double outputs", {
})

test_that("hashing multiple factors", {
skip_on_cran() # because data.table uses all cores by default
skip_if_not_installed("data.table")
data.table::setDTthreads(2) # because data.table uses all cores by default

res <- rec %>%
step_dummy_hash(all_nominal_predictors(), num_terms = 12) %>%
Expand All @@ -43,7 +45,8 @@ test_that("hashing multiple factors", {
})

test_that("hashing collapsed multiple factors", {
skip_on_cran() # because data.table uses all cores by default
skip_if_not_installed("data.table")
data.table::setDTthreads(2) # because data.table uses all cores by default

res <- rec %>%
step_dummy_hash(all_nominal_predictors(), num_terms = 4, collapse = TRUE) %>%
Expand All @@ -56,7 +59,8 @@ test_that("hashing collapsed multiple factors", {

test_that("hashing output width changes accordingly with num_terms", {
skip_if_not_installed("text2vec")
skip_on_cran() # because data.table uses all cores by default
skip_if_not_installed("data.table")
data.table::setDTthreads(2) # because data.table uses all cores by default

rec <- rec %>%
step_dummy_hash(sponsor_code, num_terms = 256) %>%
Expand All @@ -72,7 +76,8 @@ test_that("hashing output width changes accordingly with num_terms", {

test_that("hashing output width changes accordingly with num_terms", {
skip_if_not_installed("text2vec")
skip_on_cran() # because data.table uses all cores by default
skip_if_not_installed("data.table")
data.table::setDTthreads(2) # because data.table uses all cores by default

signed <- recipe(~., data = test_data) %>%
step_dummy_hash(all_predictors(), num_terms = 2) %>%
Expand All @@ -92,7 +97,8 @@ test_that("hashing output width changes accordingly with num_terms", {

test_that("check_name() is used", {
skip_if_not_installed("text2vec")
skip_on_cran() # because data.table uses all cores by default
skip_if_not_installed("data.table")
data.table::setDTthreads(2) # because data.table uses all cores by default

dat <- test_data
dat$text <- dat$sponsor_code
Expand Down Expand Up @@ -183,7 +189,8 @@ test_that("empty selection tidy method works", {

test_that("keep_original_cols works", {
skip_if_not_installed("text2vec")
skip_on_cran() # because data.table uses all cores by default
skip_if_not_installed("data.table")
data.table::setDTthreads(2) # because data.table uses all cores by default

new_names <- paste0("dummyhash_sponsor_code_", 1:5)

Expand Down Expand Up @@ -212,7 +219,8 @@ test_that("keep_original_cols works", {

test_that("keep_original_cols - can prep recipes with it missing", {
skip_if_not_installed("text2vec")
skip_on_cran() # because data.table uses all cores by default
skip_if_not_installed("data.table")
data.table::setDTthreads(2) # because data.table uses all cores by default

rec <- recipe(~ sponsor_code, data = test_data) %>%
step_dummy_hash(sponsor_code)
Expand All @@ -232,7 +240,8 @@ test_that("keep_original_cols - can prep recipes with it missing", {

test_that("printing", {
skip_if_not_installed("text2vec")
skip_on_cran() # because data.table uses all cores by default
skip_if_not_installed("data.table")
data.table::setDTthreads(2) # because data.table uses all cores by default

rec <- rec %>%
step_dummy_hash(sponsor_code)
Expand Down
21 changes: 14 additions & 7 deletions tests/testthat/test-lda.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ rec <- recipe(~ medium + artist, data = tate_text[seq_len(n_rows), ])

test_that("step_lda works as intended", {
skip_if_not_installed("text2vec")
skip_on_cran() # because data.table uses all cores by default
skip_if_not_installed("data.table")
data.table::setDTthreads(2) # because data.table uses all cores by default

n_top <- 10
rec1 <- rec %>%
Expand All @@ -27,7 +28,8 @@ test_that("step_lda works as intended", {

test_that("step_lda works with num_topics argument", {
skip_if_not_installed("text2vec")
skip_on_cran() # because data.table uses all cores by default
skip_if_not_installed("data.table")
data.table::setDTthreads(2) # because data.table uses all cores by default

n_top <- 100
rec1 <- rec %>%
Expand All @@ -42,7 +44,8 @@ test_that("step_lda works with num_topics argument", {

test_that("check_name() is used", {
skip_if_not_installed("text2vec")
skip_on_cran() # because data.table uses all cores by default
skip_if_not_installed("data.table")
data.table::setDTthreads(2) # because data.table uses all cores by default

dat <- tate_text[seq_len(100), ]
dat$text <- dat$medium
Expand All @@ -62,7 +65,8 @@ test_that("check_name() is used", {

test_that("bake method errors when needed non-standard role columns are missing", {
skip_if_not_installed("text2vec")
skip_on_cran() # because data.table uses all cores by default
skip_if_not_installed("data.table")
data.table::setDTthreads(2) # because data.table uses all cores by default

tokenized_test_data <- rec %>%
step_tokenize(medium) %>%
Expand Down Expand Up @@ -126,7 +130,8 @@ test_that("empty selection tidy method works", {

test_that("keep_original_cols works", {
skip_if_not_installed("text2vec")
skip_on_cran() # because data.table uses all cores by default
skip_if_not_installed("data.table")
data.table::setDTthreads(2) # because data.table uses all cores by default

new_names <- paste0("lda_medium_", 1:10)

Expand Down Expand Up @@ -157,7 +162,8 @@ test_that("keep_original_cols works", {

test_that("keep_original_cols - can prep recipes with it missing", {
skip_if_not_installed("text2vec")
skip_on_cran() # because data.table uses all cores by default
skip_if_not_installed("data.table")
data.table::setDTthreads(2) # because data.table uses all cores by default

rec <- recipe(~ medium, data = tate_text[seq_len(n_rows), ]) %>%
step_tokenize(medium) %>%
Expand All @@ -178,7 +184,8 @@ test_that("keep_original_cols - can prep recipes with it missing", {

test_that("printing", {
skip_if_not_installed("text2vec")
skip_on_cran() # because data.table uses all cores by default
skip_if_not_installed("data.table")
data.table::setDTthreads(2) # because data.table uses all cores by default

rec <- rec %>%
step_tokenize(medium) %>%
Expand Down
Loading

0 comments on commit 30e607a

Please sign in to comment.