sfirke · billdenney · May 22, 2024 · May 22, 2024 · Dec 18, 2024 · Dec 18, 2024
diff --git a/.gitignore b/.gitignore
@@ -2,15 +2,9 @@
 .Rhistory
 .RData
 *.docx
-
-
 ~WRL0005\.tmp
 doc
 docs
 Meta
 docs/
 janitor.Rproj
-
-
-revdep/*
-revdep
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -44,6 +44,7 @@ Suggests:
     rmarkdown,
     RSQLite,
     sf,
+    spelling,
     testthat (>= 3.0.0),
     tibble,
     tidygraph
@@ -54,3 +55,4 @@ Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.2
+Language: en-US
diff --git a/NAMESPACE b/NAMESPACE
@@ -29,6 +29,7 @@ export(adorn_rounding)
 export(adorn_title)
 export(adorn_totals)
 export(as_tabyl)
+export(assert_count)
 export(chisq.test)
 export(clean_names)
 export(compare_df_cols)

diff --git a/NEWS.md b/NEWS.md
@@ -16,8 +16,7 @@ These are all minor breaking changes resulting from enhancements and are not exp
 
 * The new function `excel_time_to_numeric()` converts times from Excel that do not have accompanying dates into a number of seconds.  (#245, thanks to **@billdenney** for the feature.)
 
-* A new argument `set_labels` to `clean_names()` stores the old names as labels in each column. Variable labels are visualized in Rstudio's data viewer or used by default by some packages such as `gt` instead of variable names. Labels can also be used in ggplot labels thanks to the function `easy_labs()` in the `ggeasy` package. Read this wonderful [post](https://www.pipinghotdata.com/posts/2022-09-13-the-case-for-variable-labels-in-r/) for more info about column labels. (#563, thanks to **@jospueyo** for the feature).
-
+* The new function `assert_count()` verifies that an expected number of values are `TRUE` for quality checks in data pipelines
 ## Bug fixes
 
 * `adorn_totals("row")` now succeeds if the new `name` of the totals row is already a factor level of the input data.frame (#529, thanks @egozoglu for reporting).
@@ -113,7 +112,7 @@ These are all minor breaking changes resulting from enhancements and are not exp
 
 ## New features
 
-* The `adorn_totals()` function now accepts the special argument `fill = NA`, which will insert a class-appropriate `NA` value into each column that isn't being totaled.  This preserves the class of each column; previously they were all convered to character. (thanks **@hamstr147** for implementing in #404 and **@ymer** for reporting in #298).
+* The `adorn_totals()` function now accepts the special argument `fill = NA`, which will insert a class-appropriate `NA` value into each column that isn't being totaled.  This preserves the class of each column; previously they were all converted to character. (thanks **@hamstr147** for implementing in #404 and **@ymer** for reporting in #298).
 
 * `adorn_totals()` now takes the value of `"both"` for the `where` argument.  That is, `adorn_totals("both")` is a shorter version of `adorn_totals(c("col", "row"))`.  (#362, thanks to **@svgsstats** for implementing and **@sfd99** for suggesting).
 
@@ -137,7 +136,7 @@ These are all minor breaking changes resulting from enhancements and are not exp
 
 * A call to make a 3-way `tabyl()` now succeeds when the first variable is of class `ordered` (#386)
 
-* If a totals row and/or column is present on a tabyl as a result of `adorn_totals()`, the functions `chisq.test()` and `fisher.test()` drop the totals and print a warning before proceding with the calculations (#385).
+* If a totals row and/or column is present on a tabyl as a result of `adorn_totals()`, the functions `chisq.test()` and `fisher.test()` drop the totals and print a warning before proceeding with the calculations (#385).
 
 # janitor 2.0.1 (2020-04-12)
 
@@ -283,7 +282,7 @@ This builds on the original functionality of janitor, with similar-but-improved
 
 ### A fully-overhauled `tabyl`
 
-`tabyl()` is now a single function that can count combinations of one, two, or three variables, ala base R's `table()`.  The resulting `tabyl` data.frames can be manipulated and formatted using a family of `adorn_` functions.  See the [tabyls vignette](https://sfirke.github.io/janitor/articles/tabyls.html) for more.
+`tabyl()` is now a single function that can count combinations of one, two, or three variables, a la base R's `table()`.  The resulting `tabyl` data.frames can be manipulated and formatted using a family of `adorn_` functions.  See the [tabyls vignette](https://sfirke.github.io/janitor/articles/tabyls.html) for more.
 
 The now-redundant legacy functions `crosstab()` and `adorn_crosstab()` have been deprecated, but remain in the package for now.  Existing code that relies on the version of `tabyl` present in janitor versions <= 0.3.1 will break if the `sort` argument was used, as that argument no longer exists in `tabyl` (use `dplyr::arrange()` instead).
 
@@ -299,7 +298,7 @@ No further changes are planned to `clean_names()` and its results should be stab
 
 ## Major features
 
-- `clean_names()` transliterates accented letters, e.g., `çãüœ` becomes `cauoe` [(#120)](https://github.com/sfirke/janitor/issues/120).  Thanks to **@fernandovmacedo**.
+- `clean_names()` transliterates accented letters, e.g., `C'C#C<E` becomes `cauoe` [(#120)](https://github.com/sfirke/janitor/issues/120).  Thanks to **@fernandovmacedo**.
 
 - `clean_names()` offers multiple options for variable name styling.  In addition to `snake_case` output you can select `smallCamelCase`, `BigCamelCase`, `ALL_CAPS` and others. [(#131)](https://github.com/sfirke/janitor/issues/131).
   - Thanks to **@tazinho**, who wrote the [snakecase](https://github.com/Tazinho/snakecase/) package that janitor depends on to do this, as well as the patch to incorporate it into `clean_names()`.  And thanks to **@maelle** for proposing this feature.

diff --git a/R/assertions.R b/R/assertions.R
@@ -0,0 +1,40 @@
+#' Verify that a vector of values has the expected number of `TRUE` values
+#'
+#' @param x A logical vector without `NA` values
+#' @param n The expected number of `TRUE` values
+#' @returns `x` if `sum(x) == n` or an informative error message otherwise
+#' @examples
+#' data.frame(A = 1:5) %>%
+#'   dplyr::mutate(
+#'     big_values = assert_count(A > 2, n = 3)
+#'   )
+#'
+#' my_data <- data.frame(name = c("Bill", "Sam"), birthdate = c("2024-05-22", "2024-05-22"))
+#' my_data |>
+#'   dplyr::mutate(
+#'     birthdate =
+#'       dplyr::case_when(
+#'         assert_count(name == "Bill" & birthdate == "2024-05-22") ~ "2024-05-23",
+#'         TRUE ~ birthdate
+#'       )
+#'   )
+#' @export
+assert_count <- function(x, n = 1) {
+  stopifnot(is.logical(x))
+  if (any(is.na(x))) {
+    stop(deparse(substitute(x)), " has NA values")
+  }
+  if (sum(x) != n) {
+    stop_message <-
+      sprintf(
+        "`%s` expected %g `TRUE` %s but %g %s found.",
+        deparse(substitute(x)),
+        n,
+        ngettext(n, "value", "values"),
+        sum(x),
+        ngettext(sum(x), "was", "were")
+      )
+    stop(stop_message)
+  }
+  x
+}
diff --git a/R/clean_names.R b/R/clean_names.R
@@ -33,7 +33,7 @@
 #'   support using `clean_names()` on `sf` and `tbl_graph` (from
 #'   `tidygraph`) objects as well as on database connections through
 #'   `dbplyr`. For cleaning other named objects like named lists
-#'   and vectors, use `make_clean_names()`. When `set_labels` is set to `TRUE`, the old names, 
+#'   and vectors, use `make_clean_names()`. When `set_labels` is set to `TRUE`, the old names,
 #'   stored as column labels, can be restored using `sjlabelled::label_to_colnames()`.
 #'
 #' @export
@@ -83,14 +83,13 @@ clean_names.default <- function(dat, ..., set_labels = FALSE) {
   if (is.null(names(dat))) {
     dimnames(dat) <- lapply(dimnames(dat), make_clean_names, ...)
   } else {
-    if (set_labels){
+    if (set_labels) {
       old_names <- names(dat)
-      for (i in seq_along(old_names)){
+      for (i in seq_along(old_names)) {
         attr(dat[[i]], "label") <- old_names[[i]]
       }
     }
     names(dat) <- make_clean_names(names(dat), ...)
-
   }
   dat
 }
@@ -112,9 +111,9 @@ clean_names.sf <- function(dat, ..., set_labels = FALSE) {
   sf_cleaned <- make_clean_names(sf_names[cols_to_rename], ...)
   # rename original df
   names(dat)[cols_to_rename] <- sf_cleaned
-  
-  if(set_labels){
-    for (i in seq_along(sf_names[cols_to_rename])){
+
+  if (set_labels) {
+    for (i in seq_along(sf_names[cols_to_rename])) {
       attr(dat[[i]], "label") <- sf_names[[i]]
     }
   }
@@ -131,7 +130,7 @@ clean_names.tbl_graph <- function(dat, ...) {
       call. = FALSE
     )
   } # nocov end
-  
+
   dplyr::rename_all(dat, .funs = make_clean_names, ...)
 }
 

diff --git a/R/round_half_up.R b/R/round_half_up.R
@@ -1,4 +1,4 @@
-#' Round a numeric vector; halves will be rounded up, ala Microsoft Excel.
+#' Round a numeric vector; halves will be rounded up, a la Microsoft Excel.
 #'
 #' @description
 #' In base R `round()`, halves are rounded to even, e.g., 12.5 and

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -5,11 +5,11 @@ template:
 
 reference:
 - title: Cleaning data
- 
+
 - subtitle: Cleaning variable names
   contents:
   - contains("clean_names")
-   
+
 - title: Exploring data
   desc: >
     tabyls are an enhanced version of tables. See `vignette("tabyls")`
@@ -19,7 +19,7 @@ reference:
   - starts_with("adorn")
   - contains("tabyl")
   - -contains('.test')
-  
+
 - subtitle: Change order
   contents:
   - row_to_names
@@ -30,6 +30,7 @@ reference:
     Compare data frames columns
   contents:
   - starts_with("compare_df_cols")
+  - assert_count
 
 - title: Removing unnecessary columns / rows
   contents:
@@ -38,9 +39,9 @@ reference:
   - get_one_to_one
   - top_levels
   - single_value
-    
+
 - title: Rounding / dates helpers
-  desc: > 
+  desc: >
     Help to mimic some behaviour from Excel or SAS.
     These should be used on vector.
   contents:

diff --git a/man/assert_count.Rd b/man/assert_count.Rd
diff --git a/tests/testthat/test-assertions.R b/tests/testthat/test-assertions.R
@@ -0,0 +1,36 @@
+test_that("assert_count", {
+  expect_equal(
+    assert_count(TRUE, 1),
+    TRUE
+  )
+  expect_equal(
+    assert_count(rep(TRUE, 3), 3),
+    rep(TRUE, 3)
+  )
+  my_vector <- c(rep(TRUE, 3), FALSE)
+  expect_equal(
+    assert_count(my_vector, 3),
+    my_vector
+  )
+  expect_error(
+    assert_count(NA),
+    regexp = "NA has NA values"
+  )
+  # more informative errors
+  my_vector <- c(NA, TRUE)
+  expect_error(
+    assert_count(my_vector),
+    regexp = "my_vector has NA values"
+  )
+  my_vector <- c(FALSE, TRUE)
+  expect_error(
+    assert_count(my_vector, n = 2),
+    regexp = "`my_vector` expected 2 `TRUE` values but 1 was found."
+  )
+  # Check grammar of error message
+  my_vector <- c(TRUE, TRUE)
+  expect_error(
+    assert_count(my_vector, n = 1),
+    regexp = "`my_vector` expected 1 `TRUE` value but 2 were found."
+  )
+})
diff --git a/tests/testthat/test-clean-names.R b/tests/testthat/test-clean-names.R
@@ -190,14 +190,14 @@ test_that("labels are created in default method (feature request #563)", {
   dat_df <- dplyr::tibble(`a a` = c(11, 22), `b b` = c(2, 3))
   dat_df_clean_labels <- clean_names(dat_df, set_labels = TRUE)
   dat_df_clean <- clean_names(dat_df)
-  
-  for (i in seq_along(names(dat_df))){
+
+  for (i in seq_along(names(dat_df))) {
     # check that old names are saved as labels when set_labels is TRUE
     expect_equal(attr(dat_df_clean_labels[[i]], "label"), names(dat_df)[[i]])
     # check that old names are not stored if set_labels is not TRUE
     expect_null(attr(dat_df_clean[[i]], "label"))
-    }
-  
+  }
+
   # expect names are always cleaned
   expect_equal(names(dat_df_clean), c("a_a", "b_b"))
   expect_equal(names(dat_df_clean_labels), c("a_a", "b_b"))
@@ -605,19 +605,19 @@ test_that("Tests for cases beyond default snake for sf objects", {
 
 test_that("labels are created in sf method (feature request #563)", {
   skip_if_not_installed("sf")
-  
+
   dat_df <- dplyr::tibble(`a a` = c(11, 22), `b b` = c(2, 3))
   dat_sf <- dat_df
-  dat_sf$x <- c(1,2)
-  dat_sf$y <- c(1,2) 
+  dat_sf$x <- c(1, 2)
+  dat_sf$y <- c(1, 2)
   dat_sf <- sf::st_as_sf(dat_sf, coords = c("x", "y"))
   dat_sf_clean_labels <- clean_names(dat_sf, set_labels = TRUE)
   dat_sf_clean <- clean_names(dat_sf)
-  
-  for (i in seq_along(names(dat_df))){
+
+  for (i in seq_along(names(dat_df))) {
     # check that old names are saved as labels when set_labels is TRUE
     expect_equal(attr(dat_sf_clean_labels[[i]], "label"), names(dat_sf)[[i]])
-    
+
     # check that old names are not stored if set_labels is not TRUE
     expect_null(attr(dat_sf_clean[[i]], "label"))
   }

diff --git a/vignettes/.gitignore b/vignettes/.gitignore
@@ -0,0 +1,2 @@
+*.html
+*.R