From 7a8e651d65ff781679ec9a75d9a2c9c478864ac0 Mon Sep 17 00:00:00 2001
From: Bill Denney <wdenney@humanpredictions.com>
Date: Wed, 22 May 2024 10:00:55 -0400
Subject: [PATCH 1/6] Add `assert_count_true()` to verify that an expected
 number of values are `TRUE`

---
 NAMESPACE                               |  1 +
 NEWS.md                                 |  2 ++
 R/assert_count_true.R                   | 30 +++++++++++++++++++++
 man/assert_count_true.Rd                | 25 +++++++++++++++++
 tests/testthat/test-assert_count_true.R | 36 +++++++++++++++++++++++++
 5 files changed, 94 insertions(+)
 create mode 100644 R/assert_count_true.R
 create mode 100644 man/assert_count_true.Rd
 create mode 100644 tests/testthat/test-assert_count_true.R

diff --git a/NAMESPACE b/NAMESPACE
index 27073345..37444fca 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -29,6 +29,7 @@ export(adorn_rounding)
 export(adorn_title)
 export(adorn_totals)
 export(as_tabyl)
+export(assert_count_true)
 export(chisq.test)
 export(clean_names)
 export(compare_df_cols)
diff --git a/NEWS.md b/NEWS.md
index 5d715ce2..0526177f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -14,6 +14,8 @@ These are all minor breaking changes resulting from enhancements and are not exp
 
 * The new function `excel_time_to_numeric()` converts times from Excel that do not have accompanying dates into a number of seconds.  (#245, thanks to **@billdenney** for the feature.)
 
+* The new function `assert_count_true()` verifies that an expected number of values are `TRUE` for quality checks in data pipelines
+
 ## Bug fixes
 
 * `adorn_totals("row")` now succeeds if the new `name` of the totals row is already a factor level of the input data.frame (#529, thanks @egozoglu for reporting).
diff --git a/R/assert_count_true.R b/R/assert_count_true.R
new file mode 100644
index 00000000..bd212568
--- /dev/null
+++ b/R/assert_count_true.R
@@ -0,0 +1,30 @@
+#' Verify that a vector of values has the expected number of `TRUE` values
+#'
+#' @param x A logical vecotor without `NA` values
+#' @param n The expected number of `TRUE` values
+#' @returns `x` if `sum(x) == n` or an informative error message otherwise
+#' @examples
+#' data.frame(A = 1:5) %>%
+#'   dplyr::mutate(
+#'     big_values = assert_count_true(A > 2, n = 3)
+#'   )
+#' @export
+assert_count_true <- function(x, n = 1) {
+  stopifnot(is.logical(x))
+  if (any(is.na(x))) {
+    stop(deparse(substitute(x)), " has NA values")
+  }
+  if (sum(x) != n) {
+    stop_message <-
+      sprintf(
+        "`%s` expected %g `TRUE` %s but %g %s found.",
+        deparse(substitute(x)),
+        n,
+        ngettext(n, "value", "values"),
+        sum(x),
+        ngettext(sum(x), "was", "were")
+      )
+    stop(stop_message)
+  }
+  x
+}
diff --git a/man/assert_count_true.Rd b/man/assert_count_true.Rd
new file mode 100644
index 00000000..46db9697
--- /dev/null
+++ b/man/assert_count_true.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/assert_count_true.R
+\name{assert_count_true}
+\alias{assert_count_true}
+\title{Verify that a vector of values has the expected number of \code{TRUE} values}
+\usage{
+assert_count_true(x, n = 1)
+}
+\arguments{
+\item{x}{A logical vecotor without \code{NA} values}
+
+\item{n}{The expected number of \code{TRUE} values}
+}
+\value{
+\code{x} if \code{sum(x) == n} or an informative error message otherwise
+}
+\description{
+Verify that a vector of values has the expected number of \code{TRUE} values
+}
+\examples{
+data.frame(A = 1:5) \%>\%
+  dplyr::mutate(
+    big_values = assert_count_true(A > 2, n = 3)
+  )
+}
diff --git a/tests/testthat/test-assert_count_true.R b/tests/testthat/test-assert_count_true.R
new file mode 100644
index 00000000..7b477070
--- /dev/null
+++ b/tests/testthat/test-assert_count_true.R
@@ -0,0 +1,36 @@
+test_that("assert_count_true", {
+  expect_equal(
+    assert_count_true(TRUE, 1),
+    TRUE
+  )
+  expect_equal(
+    assert_count_true(rep(TRUE, 3), 3),
+    rep(TRUE, 3)
+  )
+  my_vector <- c(rep(TRUE, 3), FALSE)
+  expect_equal(
+    assert_count_true(my_vector, 3),
+    my_vector
+  )
+  expect_error(
+    assert_count_true(NA),
+    regexp = "NA has NA values"
+  )
+  # more informative errors
+  my_vector <- c(NA, TRUE)
+  expect_error(
+    assert_count_true(my_vector),
+    regexp = "my_vector has NA values"
+  )
+  my_vector <- c(FALSE, TRUE)
+  expect_error(
+    assert_count_true(my_vector, n = 2),
+    regexp = "`my_vector` expected 2 `TRUE` values but 1 was found."
+  )
+  # Check grammar of error message
+  my_vector <- c(TRUE, TRUE)
+  expect_error(
+    assert_count_true(my_vector, n = 1),
+    regexp = "`my_vector` expected 1 `TRUE` value but 2 were found."
+  )
+})

From 030a03db7801ec25058d5d7ee5e80bf079916ab7 Mon Sep 17 00:00:00 2001
From: Bill Denney <wdenney@humanpredictions.com>
Date: Wed, 22 May 2024 10:16:14 -0400
Subject: [PATCH 2/6] Fix pkgdown; add another example

---
 R/assert_count_true.R    | 10 ++++++++++
 _pkgdown.yml             | 11 ++++++-----
 man/assert_count_true.Rd | 10 ++++++++++
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/R/assert_count_true.R b/R/assert_count_true.R
index bd212568..37779315 100644
--- a/R/assert_count_true.R
+++ b/R/assert_count_true.R
@@ -8,6 +8,16 @@
 #'   dplyr::mutate(
 #'     big_values = assert_count_true(A > 2, n = 3)
 #'   )
+#'
+#' my_data <- data.frame(name = c("Bill", "Sam"), birthdate = c("2024-05-22", "2024-05-22"))
+#' my_data |>
+#'   dplyr::mutate(
+#'     birthdate =
+#'       dplyr::case_when(
+#'         assert_count_true(name == "Bill" & birthdate == "2024-05-22") ~ "2024-05-23",
+#'         TRUE ~ birthdate
+#'       )
+#'   )
 #' @export
 assert_count_true <- function(x, n = 1) {
   stopifnot(is.logical(x))
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 965f4647..7d96cb04 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -5,11 +5,11 @@ template:
 
 reference:
 - title: Cleaning data
- 
+
 - subtitle: Cleaning variable names
   contents:
   - contains("clean_names")
-   
+
 - title: Exploring data
   desc: >
     tabyls are an enhanced version of tables. See `vignette("tabyls")`
@@ -19,7 +19,7 @@ reference:
   - starts_with("adorn")
   - contains("tabyl")
   - -contains('.test')
-  
+
 - subtitle: Change order
   contents:
   - row_to_names
@@ -30,6 +30,7 @@ reference:
     Compare data frames columns
   contents:
   - starts_with("compare_df_cols")
+  - assert_count_true
 
 - title: Removing unnecessary columns / rows
   contents:
@@ -38,9 +39,9 @@ reference:
   - get_one_to_one
   - top_levels
   - single_value
-    
+
 - title: Rounding / dates helpers
-  desc: > 
+  desc: >
     Help to mimic some behaviour from Excel or SAS.
     These should be used on vector.
   contents:
diff --git a/man/assert_count_true.Rd b/man/assert_count_true.Rd
index 46db9697..c158760e 100644
--- a/man/assert_count_true.Rd
+++ b/man/assert_count_true.Rd
@@ -22,4 +22,14 @@ data.frame(A = 1:5) \%>\%
   dplyr::mutate(
     big_values = assert_count_true(A > 2, n = 3)
   )
+
+my_data <- data.frame(name = c("Bill", "Sam"), birthdate = c("2024-05-22", "2024-05-22"))
+my_data |>
+  dplyr::mutate(
+    birthdate =
+      dplyr::case_when(
+        assert_count_true(name == "Bill" & birthdate == "2024-05-22") ~ "2024-05-23",
+        TRUE ~ birthdate
+      )
+  )
 }

From 5b4c1fe79ea539a555c8262015e07d9d5afafdb9 Mon Sep 17 00:00:00 2001
From: Bill Denney <wdenney@humanpredictions.com>
Date: Wed, 18 Dec 2024 16:43:45 -0500
Subject: [PATCH 3/6] Move function name to `assert_count()`; add assertions
 vignette

---
 .gitignore                                    |   1 +
 DESCRIPTION                                   |   2 +-
 NAMESPACE                                     |   2 +-
 NEWS.md                                       |   2 +-
 R/{assert_count_true.R => assertions.R}       |   8 +-
 _pkgdown.yml                                  |   2 +-
 man/{assert_count_true.Rd => assert_count.Rd} |  14 +-
 tests/testthat/test-assert_count_true.R       |  16 +-
 vignettes/.gitignore                          |   2 +
 vignettes/assertions.Rmd                      | 137 ++++++++++++++++++
 10 files changed, 163 insertions(+), 23 deletions(-)
 rename R/{assert_count_true.R => assertions.R} (79%)
 rename man/{assert_count_true.Rd => assert_count.Rd} (66%)
 create mode 100644 vignettes/.gitignore
 create mode 100644 vignettes/assertions.Rmd

diff --git a/.gitignore b/.gitignore
index 9591447d..8e6f9acb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,4 @@ docs
 Meta
 docs/
 janitor.Rproj
+inst/doc
diff --git a/DESCRIPTION b/DESCRIPTION
index 7fd04e02..e0d9ac4a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -52,4 +52,4 @@ Config/testthat/edition: 3
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
diff --git a/NAMESPACE b/NAMESPACE
index 37444fca..c6fcc4c1 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -29,7 +29,7 @@ export(adorn_rounding)
 export(adorn_title)
 export(adorn_totals)
 export(as_tabyl)
-export(assert_count_true)
+export(assert_count)
 export(chisq.test)
 export(clean_names)
 export(compare_df_cols)
diff --git a/NEWS.md b/NEWS.md
index 0526177f..a235f0fa 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -14,7 +14,7 @@ These are all minor breaking changes resulting from enhancements and are not exp
 
 * The new function `excel_time_to_numeric()` converts times from Excel that do not have accompanying dates into a number of seconds.  (#245, thanks to **@billdenney** for the feature.)
 
-* The new function `assert_count_true()` verifies that an expected number of values are `TRUE` for quality checks in data pipelines
+* The new function `assert_count()` verifies that an expected number of values are `TRUE` for quality checks in data pipelines
 
 ## Bug fixes
 
diff --git a/R/assert_count_true.R b/R/assertions.R
similarity index 79%
rename from R/assert_count_true.R
rename to R/assertions.R
index 37779315..f007024f 100644
--- a/R/assert_count_true.R
+++ b/R/assertions.R
@@ -1,12 +1,12 @@
 #' Verify that a vector of values has the expected number of `TRUE` values
 #'
-#' @param x A logical vecotor without `NA` values
+#' @param x A logical vector without `NA` values
 #' @param n The expected number of `TRUE` values
 #' @returns `x` if `sum(x) == n` or an informative error message otherwise
 #' @examples
 #' data.frame(A = 1:5) %>%
 #'   dplyr::mutate(
-#'     big_values = assert_count_true(A > 2, n = 3)
+#'     big_values = assert_count(A > 2, n = 3)
 #'   )
 #'
 #' my_data <- data.frame(name = c("Bill", "Sam"), birthdate = c("2024-05-22", "2024-05-22"))
@@ -14,12 +14,12 @@
 #'   dplyr::mutate(
 #'     birthdate =
 #'       dplyr::case_when(
-#'         assert_count_true(name == "Bill" & birthdate == "2024-05-22") ~ "2024-05-23",
+#'         assert_count(name == "Bill" & birthdate == "2024-05-22") ~ "2024-05-23",
 #'         TRUE ~ birthdate
 #'       )
 #'   )
 #' @export
-assert_count_true <- function(x, n = 1) {
+assert_count <- function(x, n = 1) {
   stopifnot(is.logical(x))
   if (any(is.na(x))) {
     stop(deparse(substitute(x)), " has NA values")
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 7d96cb04..2038dddf 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -30,7 +30,7 @@ reference:
     Compare data frames columns
   contents:
   - starts_with("compare_df_cols")
-  - assert_count_true
+  - assert_count
 
 - title: Removing unnecessary columns / rows
   contents:
diff --git a/man/assert_count_true.Rd b/man/assert_count.Rd
similarity index 66%
rename from man/assert_count_true.Rd
rename to man/assert_count.Rd
index c158760e..0a88a61a 100644
--- a/man/assert_count_true.Rd
+++ b/man/assert_count.Rd
@@ -1,13 +1,13 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/assert_count_true.R
-\name{assert_count_true}
-\alias{assert_count_true}
+% Please edit documentation in R/assertions.R
+\name{assert_count}
+\alias{assert_count}
 \title{Verify that a vector of values has the expected number of \code{TRUE} values}
 \usage{
-assert_count_true(x, n = 1)
+assert_count(x, n = 1)
 }
 \arguments{
-\item{x}{A logical vecotor without \code{NA} values}
+\item{x}{A logical vector without \code{NA} values}
 
 \item{n}{The expected number of \code{TRUE} values}
 }
@@ -20,7 +20,7 @@ Verify that a vector of values has the expected number of \code{TRUE} values
 \examples{
 data.frame(A = 1:5) \%>\%
   dplyr::mutate(
-    big_values = assert_count_true(A > 2, n = 3)
+    big_values = assert_count(A > 2, n = 3)
   )
 
 my_data <- data.frame(name = c("Bill", "Sam"), birthdate = c("2024-05-22", "2024-05-22"))
@@ -28,7 +28,7 @@ my_data |>
   dplyr::mutate(
     birthdate =
       dplyr::case_when(
-        assert_count_true(name == "Bill" & birthdate == "2024-05-22") ~ "2024-05-23",
+        assert_count(name == "Bill" & birthdate == "2024-05-22") ~ "2024-05-23",
         TRUE ~ birthdate
       )
   )
diff --git a/tests/testthat/test-assert_count_true.R b/tests/testthat/test-assert_count_true.R
index 7b477070..3d10287d 100644
--- a/tests/testthat/test-assert_count_true.R
+++ b/tests/testthat/test-assert_count_true.R
@@ -1,36 +1,36 @@
-test_that("assert_count_true", {
+test_that("assert_count", {
   expect_equal(
-    assert_count_true(TRUE, 1),
+    assert_count(TRUE, 1),
     TRUE
   )
   expect_equal(
-    assert_count_true(rep(TRUE, 3), 3),
+    assert_count(rep(TRUE, 3), 3),
     rep(TRUE, 3)
   )
   my_vector <- c(rep(TRUE, 3), FALSE)
   expect_equal(
-    assert_count_true(my_vector, 3),
+    assert_count(my_vector, 3),
     my_vector
   )
   expect_error(
-    assert_count_true(NA),
+    assert_count(NA),
     regexp = "NA has NA values"
   )
   # more informative errors
   my_vector <- c(NA, TRUE)
   expect_error(
-    assert_count_true(my_vector),
+    assert_count(my_vector),
     regexp = "my_vector has NA values"
   )
   my_vector <- c(FALSE, TRUE)
   expect_error(
-    assert_count_true(my_vector, n = 2),
+    assert_count(my_vector, n = 2),
     regexp = "`my_vector` expected 2 `TRUE` values but 1 was found."
   )
   # Check grammar of error message
   my_vector <- c(TRUE, TRUE)
   expect_error(
-    assert_count_true(my_vector, n = 1),
+    assert_count(my_vector, n = 1),
     regexp = "`my_vector` expected 1 `TRUE` value but 2 were found."
   )
 })
diff --git a/vignettes/.gitignore b/vignettes/.gitignore
new file mode 100644
index 00000000..097b2416
--- /dev/null
+++ b/vignettes/.gitignore
@@ -0,0 +1,2 @@
+*.html
+*.R
diff --git a/vignettes/assertions.Rmd b/vignettes/assertions.Rmd
new file mode 100644
index 00000000..6ee31f97
--- /dev/null
+++ b/vignettes/assertions.Rmd
@@ -0,0 +1,137 @@
+---
+title: "Assertions for cleaning data"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Assertions for cleaning data}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+# Assertions for cleaning data
+
+Part of cleaning data includes assertions to make sure that data are expected
+before changing the values. `janitor` provides an assertion to enable data
+verification before making changes; more assertions may be added in the future.
+
+```{r setup}
+library(janitor)
+library(dplyr)
+```
+
+## `assert_count()` - Verify the number of `TRUE` values
+
+`assert_count()` will verify that the number of `TRUE` values is the expected
+number. It is useful when data may change over time and you want to be sure that
+you are changing only data that you intend to change.
+
+For example, you are given a data set with test scores for several students.
+Some of the scores are missing.
+
+```{r raw-v1}
+raw <-
+  data.frame(
+    student_id = c(123, 124, 125, 126),
+    test_score = c(NA, 93, NA, 82)
+  )
+```
+
+When you first receive the data, you're told separately that student 123 has a
+score of 84 and 125 has a score of 91. You want to verify that you are finding
+the right rows to replace and that you replace them.
+
+```{r clean-v1-mistake}
+clean_mistake <-
+  raw %>%
+  mutate(
+    test_score =
+      case_when(
+        student_id == 124 & is.na(test_score) ~ 84,
+        student_id == 125 & is.na(test_score) ~ 91,
+        TRUE ~ test_score
+      )
+  )
+```
+
+Because of a bug in the code, `student_id == 123` was not replaced.
+
+```{r clean-v1-mistake-table}
+clean_mistake
+```
+
+Using `assert_count()`, you would find this error because of an error raised by
+`assert_count()` in the pipeline.
+
+```{r clean_assert}
+try({
+clean_assert <-
+  raw %>%
+  mutate(
+    test_score =
+      case_when(
+        assert_count(student_id == 124 & is.na(test_score)) ~ 84,
+        assert_count(student_id == 125 & is.na(test_score)) ~ 91,
+        TRUE ~ test_score
+      )
+  )
+})
+```
+
+Fixing the code bug so that the first `student_id == 123` instead of
+`student_id == 124`, you now get the expected result.
+
+```{r clean_assert_fixed}
+clean_assert <-
+  raw %>%
+  mutate(
+    test_score =
+      case_when(
+        assert_count(student_id == 123 & is.na(test_score)) ~ 84,
+        assert_count(student_id == 125 & is.na(test_score)) ~ 91,
+        TRUE ~ test_score
+      )
+  )
+
+# New result
+clean_assert
+
+# Original data
+raw
+```
+
+### Changing data
+
+Another way that `assert_count()` can help is verifying that your code notifies
+you if your data changes in an important way. Using the example before, you may
+get a new raw data set (`raw_v2`) that has some of the `test_score` values
+added. They may be different than what you were told before.
+
+Running the same code on the new data will give you an informative error telling
+you what to look into.
+
+```{r raw_v2}
+raw_v2 <-
+  data.frame(
+    student_id = c(123, 124, 125, 126),
+    test_score = c(90, 93, NA, 82)
+  )
+
+try({
+clean_assert <-
+  raw_v2 %>%
+  mutate(
+    test_score =
+      case_when(
+        assert_count(student_id == 123 & is.na(test_score)) ~ 84,
+        assert_count(student_id == 125 & is.na(test_score)) ~ 91,
+        TRUE ~ test_score
+      )
+  )
+})
+```

From 140d619f63b98c2309ea0f1258fdfcca8c5256f1 Mon Sep 17 00:00:00 2001
From: Bill Denney <wdenney@humanpredictions.com>
Date: Wed, 18 Dec 2024 16:46:50 -0500
Subject: [PATCH 4/6] Clean up filename

---
 tests/testthat/{test-assert_count_true.R => test-assertions.R} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/testthat/{test-assert_count_true.R => test-assertions.R} (100%)

diff --git a/tests/testthat/test-assert_count_true.R b/tests/testthat/test-assertions.R
similarity index 100%
rename from tests/testthat/test-assert_count_true.R
rename to tests/testthat/test-assertions.R

From 0edd037ddba5313158e1bb655febd5dd31a6f8bc Mon Sep 17 00:00:00 2001
From: billdenney <billdenney@users.noreply.github.com>
Date: Wed, 18 Dec 2024 21:48:42 +0000
Subject: [PATCH 5/6] Style code (GHA)

---
 R/clean_names.R                   | 15 ++++++------
 tests/testthat/test-clean-names.R | 20 ++++++++--------
 vignettes/assertions.Rmd          | 40 +++++++++++++++----------------
 3 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/R/clean_names.R b/R/clean_names.R
index 388144d8..abdcd73c 100644
--- a/R/clean_names.R
+++ b/R/clean_names.R
@@ -33,7 +33,7 @@
 #'   support using `clean_names()` on `sf` and `tbl_graph` (from
 #'   `tidygraph`) objects as well as on database connections through
 #'   `dbplyr`. For cleaning other named objects like named lists
-#'   and vectors, use `make_clean_names()`. When `set_labels` is set to `TRUE`, the old names, 
+#'   and vectors, use `make_clean_names()`. When `set_labels` is set to `TRUE`, the old names,
 #'   stored as column labels, can be restored using `sjlabelled::label_to_colnames()`.
 #'
 #' @export
@@ -83,14 +83,13 @@ clean_names.default <- function(dat, ..., set_labels = FALSE) {
   if (is.null(names(dat))) {
     dimnames(dat) <- lapply(dimnames(dat), make_clean_names, ...)
   } else {
-    if (set_labels){
+    if (set_labels) {
       old_names <- names(dat)
-      for (i in seq_along(old_names)){
+      for (i in seq_along(old_names)) {
         attr(dat[[i]], "label") <- old_names[[i]]
       }
     }
     names(dat) <- make_clean_names(names(dat), ...)
-    
   }
   dat
 }
@@ -112,9 +111,9 @@ clean_names.sf <- function(dat, ..., set_labels = FALSE) {
   sf_cleaned <- make_clean_names(sf_names[cols_to_rename], ...)
   # rename original df
   names(dat)[cols_to_rename] <- sf_cleaned
-  
-  if(set_labels){
-    for (i in seq_along(sf_names[cols_to_rename])){
+
+  if (set_labels) {
+    for (i in seq_along(sf_names[cols_to_rename])) {
       attr(dat[[i]], "label") <- sf_names[[i]]
     }
   }
@@ -131,7 +130,7 @@ clean_names.tbl_graph <- function(dat, ...) {
       call. = FALSE
     )
   } # nocov end
-  
+
   dplyr::rename_all(dat, .funs = make_clean_names, ...)
 }
 
diff --git a/tests/testthat/test-clean-names.R b/tests/testthat/test-clean-names.R
index 15bb942a..bc4e704d 100644
--- a/tests/testthat/test-clean-names.R
+++ b/tests/testthat/test-clean-names.R
@@ -190,14 +190,14 @@ test_that("labels are created in default method (feature request #563)", {
   dat_df <- dplyr::tibble(`a a` = c(11, 22), `b b` = c(2, 3))
   dat_df_clean_labels <- clean_names(dat_df, set_labels = TRUE)
   dat_df_clean <- clean_names(dat_df)
-  
-  for (i in seq_along(names(dat_df))){
+
+  for (i in seq_along(names(dat_df))) {
     # check that old names are saved as labels when set_labels is TRUE
     expect_equal(attr(dat_df_clean_labels[[i]], "label"), names(dat_df)[[i]])
     # check that old names are not stored if set_labels is not TRUE
     expect_null(attr(dat_df_clean[[i]], "label"))
-    }
-  
+  }
+
   # expect names are always cleaned
   expect_equal(names(dat_df_clean), c("a_a", "b_b"))
   expect_equal(names(dat_df_clean_labels), c("a_a", "b_b"))
@@ -605,19 +605,19 @@ test_that("Tests for cases beyond default snake for sf objects", {
 
 test_that("labels are created in sf method (feature request #563)", {
   skip_if_not_installed("sf")
-  
+
   dat_df <- dplyr::tibble(`a a` = c(11, 22), `b b` = c(2, 3))
   dat_sf <- dat_df
-  dat_sf$x <- c(1,2)
-  dat_sf$y <- c(1,2) 
+  dat_sf$x <- c(1, 2)
+  dat_sf$y <- c(1, 2)
   dat_sf <- sf::st_as_sf(dat_sf, coords = c("x", "y"))
   dat_sf_clean_labels <- clean_names(dat_sf, set_labels = TRUE)
   dat_sf_clean <- clean_names(dat_sf)
-  
-  for (i in seq_along(names(dat_df))){
+
+  for (i in seq_along(names(dat_df))) {
     # check that old names are saved as labels when set_labels is TRUE
     expect_equal(attr(dat_sf_clean_labels[[i]], "label"), names(dat_sf)[[i]])
-    
+
     # check that old names are not stored if set_labels is not TRUE
     expect_null(attr(dat_sf_clean[[i]], "label"))
   }
diff --git a/vignettes/assertions.Rmd b/vignettes/assertions.Rmd
index 6ee31f97..7099cf98 100644
--- a/vignettes/assertions.Rmd
+++ b/vignettes/assertions.Rmd
@@ -70,16 +70,16 @@ Using `assert_count()`, you would find this error because of an error raised by
 
 ```{r clean_assert}
 try({
-clean_assert <-
-  raw %>%
-  mutate(
-    test_score =
-      case_when(
-        assert_count(student_id == 124 & is.na(test_score)) ~ 84,
-        assert_count(student_id == 125 & is.na(test_score)) ~ 91,
-        TRUE ~ test_score
-      )
-  )
+  clean_assert <-
+    raw %>%
+    mutate(
+      test_score =
+        case_when(
+          assert_count(student_id == 124 & is.na(test_score)) ~ 84,
+          assert_count(student_id == 125 & is.na(test_score)) ~ 91,
+          TRUE ~ test_score
+        )
+    )
 })
 ```
 
@@ -123,15 +123,15 @@ raw_v2 <-
   )
 
 try({
-clean_assert <-
-  raw_v2 %>%
-  mutate(
-    test_score =
-      case_when(
-        assert_count(student_id == 123 & is.na(test_score)) ~ 84,
-        assert_count(student_id == 125 & is.na(test_score)) ~ 91,
-        TRUE ~ test_score
-      )
-  )
+  clean_assert <-
+    raw_v2 %>%
+    mutate(
+      test_score =
+        case_when(
+          assert_count(student_id == 123 & is.na(test_score)) ~ 84,
+          assert_count(student_id == 125 & is.na(test_score)) ~ 91,
+          TRUE ~ test_score
+        )
+    )
 })
 ```

From f9ec0eb03b705b56dbe8de9afa533e4a6e57b6e6 Mon Sep 17 00:00:00 2001
From: Bill Denney <wdenney@humanpredictions.com>
Date: Wed, 18 Dec 2024 16:58:54 -0500
Subject: [PATCH 6/6] Fix spelling issues, update janitor.md

---
 DESCRIPTION           |   2 +
 NEWS.md               |   8 +--
 R/round_half_up.R     |   2 +-
 vignettes/janitor.Rmd |   2 +-
 vignettes/janitor.md  | 155 +++++++++++++++++++-----------------------
 vignettes/tabyls.md   |   6 +-
 6 files changed, 82 insertions(+), 93 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 715ff2f7..ff84bb41 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -44,6 +44,7 @@ Suggests:
     rmarkdown,
     RSQLite,
     sf,
+    spelling,
     testthat (>= 3.0.0),
     tibble,
     tidygraph
@@ -54,3 +55,4 @@ Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.2
+Language: en-US
diff --git a/NEWS.md b/NEWS.md
index b6a2844f..26f469ee 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -106,7 +106,7 @@ These are all minor breaking changes resulting from enhancements and are not exp
 
 ## New features
 
-* The `adorn_totals()` function now accepts the special argument `fill = NA`, which will insert a class-appropriate `NA` value into each column that isn't being totaled.  This preserves the class of each column; previously they were all convered to character. (thanks **@hamstr147** for implementing in #404 and **@ymer** for reporting in #298).
+* The `adorn_totals()` function now accepts the special argument `fill = NA`, which will insert a class-appropriate `NA` value into each column that isn't being totaled.  This preserves the class of each column; previously they were all converted to character. (thanks **@hamstr147** for implementing in #404 and **@ymer** for reporting in #298).
 
 * `adorn_totals()` now takes the value of `"both"` for the `where` argument.  That is, `adorn_totals("both")` is a shorter version of `adorn_totals(c("col", "row"))`.  (#362, thanks to **@svgsstats** for implementing and **@sfd99** for suggesting).
 
@@ -130,7 +130,7 @@ These are all minor breaking changes resulting from enhancements and are not exp
 
 * A call to make a 3-way `tabyl()` now succeeds when the first variable is of class `ordered` (#386)
 
-* If a totals row and/or column is present on a tabyl as a result of `adorn_totals()`, the functions `chisq.test()` and `fisher.test()` drop the totals and print a warning before proceding with the calculations (#385).
+* If a totals row and/or column is present on a tabyl as a result of `adorn_totals()`, the functions `chisq.test()` and `fisher.test()` drop the totals and print a warning before proceeding with the calculations (#385).
 
 # janitor 2.0.1 (2020-04-12)
 
@@ -276,7 +276,7 @@ This builds on the original functionality of janitor, with similar-but-improved
 
 ### A fully-overhauled `tabyl`
 
-`tabyl()` is now a single function that can count combinations of one, two, or three variables, ala base R's `table()`.  The resulting `tabyl` data.frames can be manipulated and formatted using a family of `adorn_` functions.  See the [tabyls vignette](https://sfirke.github.io/janitor/articles/tabyls.html) for more.
+`tabyl()` is now a single function that can count combinations of one, two, or three variables, a la base R's `table()`.  The resulting `tabyl` data.frames can be manipulated and formatted using a family of `adorn_` functions.  See the [tabyls vignette](https://sfirke.github.io/janitor/articles/tabyls.html) for more.
 
 The now-redundant legacy functions `crosstab()` and `adorn_crosstab()` have been deprecated, but remain in the package for now.  Existing code that relies on the version of `tabyl` present in janitor versions <= 0.3.1 will break if the `sort` argument was used, as that argument no longer exists in `tabyl` (use `dplyr::arrange()` instead).
 
@@ -292,7 +292,7 @@ No further changes are planned to `clean_names()` and its results should be stab
 
 ## Major features
 
-- `clean_names()` transliterates accented letters, e.g., `çãüœ` becomes `cauoe` [(#120)](https://github.com/sfirke/janitor/issues/120).  Thanks to **@fernandovmacedo**.
+- `clean_names()` transliterates accented letters, e.g., `C'C#C<E` becomes `cauoe` [(#120)](https://github.com/sfirke/janitor/issues/120).  Thanks to **@fernandovmacedo**.
 
 - `clean_names()` offers multiple options for variable name styling.  In addition to `snake_case` output you can select `smallCamelCase`, `BigCamelCase`, `ALL_CAPS` and others. [(#131)](https://github.com/sfirke/janitor/issues/131).
   - Thanks to **@tazinho**, who wrote the [snakecase](https://github.com/Tazinho/snakecase/) package that janitor depends on to do this, as well as the patch to incorporate it into `clean_names()`.  And thanks to **@maelle** for proposing this feature.
diff --git a/R/round_half_up.R b/R/round_half_up.R
index c4f63006..a7ba9524 100644
--- a/R/round_half_up.R
+++ b/R/round_half_up.R
@@ -1,4 +1,4 @@
-#' Round a numeric vector; halves will be rounded up, ala Microsoft Excel.
+#' Round a numeric vector; halves will be rounded up, a la Microsoft Excel.
 #'
 #' @description
 #' In base R `round()`, halves are rounded to even, e.g., 12.5 and
diff --git a/vignettes/janitor.Rmd b/vignettes/janitor.Rmd
index 5e64c70d..4d76deff 100644
--- a/vignettes/janitor.Rmd
+++ b/vignettes/janitor.Rmd
@@ -124,7 +124,7 @@ Smaller functions for use in particular situations.  More human-readable than th
 
 ### Manipulate vectors of names with `make_clean_names()`
 
-Like base R's `make.names()`, but with the stylings and case choice of the long-time janitor function `clean_names()`.  While `clean_names()` is still offered for use in  data.frame pipeline with `%>%`, `make_clean_names()` allows for more general usage, e.g., on a vector.
+Like base R's `make.names()`, but with the styling and case choice of the long-time janitor function `clean_names()`.  While `clean_names()` is still offered for use in  data.frame pipeline with `%>%`, `make_clean_names()` allows for more general usage, e.g., on a vector.
 
 It can also be used as an argument to `.name_repair` in the newest version of `tibble::as_tibble`:
 ```{r}
diff --git a/vignettes/janitor.md b/vignettes/janitor.md
index 3627d7b3..2ee3b61f 100644
--- a/vignettes/janitor.md
+++ b/vignettes/janitor.md
@@ -1,68 +1,45 @@
 Overview of janitor functions
 ================
-2023-02-02
-
-- <a href="#major-functions" id="toc-major-functions">Major functions</a>
-  - <a href="#cleaning" id="toc-cleaning">Cleaning</a>
-    - <a href="#clean-dataframe-names-with-clean_names"
-      id="toc-clean-dataframe-names-with-clean_names">Clean data.frame names
-      with <code>clean_names()</code></a>
-    - <a href="#do-those-dataframes-actually-contain-the-same-columns"
-      id="toc-do-those-dataframes-actually-contain-the-same-columns">Do those
-      data.frames actually contain the same columns?</a>
-  - <a href="#exploring" id="toc-exploring">Exploring</a>
-    - <a href="#tabyl---a-better-version-of-table"
-      id="toc-tabyl---a-better-version-of-table"><code>tabyl()</code> - a
-      better version of <code>table()</code></a>
-    - <a
-      href="#explore-records-with-duplicated-values-for-specific-combinations-of-variables-with-get_dupes"
-      id="toc-explore-records-with-duplicated-values-for-specific-combinations-of-variables-with-get_dupes">Explore
-      records with duplicated values for specific combinations of variables
-      with <code>get_dupes()</code></a>
-    - <a href="#explore-relationships-between-columns-with-get_one_to_one"
-      id="toc-explore-relationships-between-columns-with-get_one_to_one">Explore
-      relationships between columns with <code>get_one_to_one()</code></a>
-- <a href="#minor-functions" id="toc-minor-functions">Minor functions</a>
-  - <a href="#cleaning-1" id="toc-cleaning-1">Cleaning</a>
-    - <a href="#manipulate-vectors-of-names-with-make_clean_names"
-      id="toc-manipulate-vectors-of-names-with-make_clean_names">Manipulate
-      vectors of names with <code>make_clean_names()</code></a>
-    - <a href="#validate-that-a-column-has-a-single_value-per-group"
-      id="toc-validate-that-a-column-has-a-single_value-per-group">Validate
-      that a column has a <code>single_value()</code> per group</a>
-    - <a href="#remove_empty-rows-and-columns"
-      id="toc-remove_empty-rows-and-columns"><code>remove_empty()</code> rows
-      and columns</a>
-    - <a href="#remove_constant-columns"
-      id="toc-remove_constant-columns"><code>remove_constant()</code>
-      columns</a>
-    - <a href="#directionally-consistent-rounding-behavior-with-round_half_up"
-      id="toc-directionally-consistent-rounding-behavior-with-round_half_up">Directionally-consistent
-      rounding behavior with <code>round_half_up()</code></a>
-    - <a
-      href="#round-decimals-to-precise-fractions-of-a-given-denominator-with-round_to_fraction"
-      id="toc-round-decimals-to-precise-fractions-of-a-given-denominator-with-round_to_fraction">Round
-      decimals to precise fractions of a given denominator with
-      <code>round_to_fraction()</code></a>
-    - <a href="#fix-dates-stored-as-serial-numbers-with-excel_numeric_to_date"
-      id="toc-fix-dates-stored-as-serial-numbers-with-excel_numeric_to_date">Fix
-      dates stored as serial numbers with
-      <code>excel_numeric_to_date()</code></a>
-    - <a href="#convert-a-mix-of-date-and-datetime-formats-to-date"
-      id="toc-convert-a-mix-of-date-and-datetime-formats-to-date">Convert a
-      mix of date and datetime formats to date</a>
-    - <a href="#elevate-column-names-stored-in-a-dataframe-row"
-      id="toc-elevate-column-names-stored-in-a-dataframe-row">Elevate column
-      names stored in a data.frame row</a>
-    - <a href="#find-the-header-row-buried-within-a-messy-dataframe"
-      id="toc-find-the-header-row-buried-within-a-messy-dataframe">Find the
-      header row buried within a messy data.frame</a>
-  - <a href="#exploring-1" id="toc-exploring-1">Exploring</a>
-    - <a
-      href="#count-factor-levels-in-groups-of-high-medium-and-low-with-top_levels"
-      id="toc-count-factor-levels-in-groups-of-high-medium-and-low-with-top_levels">Count
-      factor levels in groups of high, medium, and low with
-      <code>top_levels()</code></a>
+2024-12-18
+
+- [Major functions](#major-functions)
+  - [Cleaning](#cleaning)
+    - [Clean data.frame names with
+      `clean_names()`](#clean-dataframe-names-with-clean_names)
+    - [Do those data.frames actually contain the same
+      columns?](#do-those-dataframes-actually-contain-the-same-columns)
+  - [Exploring](#exploring)
+    - [`tabyl()` - a better version of
+      `table()`](#tabyl---a-better-version-of-table)
+    - [Explore records with duplicated values for specific combinations
+      of variables with
+      `get_dupes()`](#explore-records-with-duplicated-values-for-specific-combinations-of-variables-with-get_dupes)
+    - [Explore relationships between columns with
+      `get_one_to_one()`](#explore-relationships-between-columns-with-get_one_to_one)
+- [Minor functions](#minor-functions)
+  - [Cleaning](#cleaning-1)
+    - [Manipulate vectors of names with
+      `make_clean_names()`](#manipulate-vectors-of-names-with-make_clean_names)
+    - [Validate that a column has a `single_value()` per
+      group](#validate-that-a-column-has-a-single_value-per-group)
+    - [`remove_empty()` rows and
+      columns](#remove_empty-rows-and-columns)
+    - [`remove_constant()` columns](#remove_constant-columns)
+    - [Directionally-consistent rounding behavior with
+      `round_half_up()`](#directionally-consistent-rounding-behavior-with-round_half_up)
+    - [Round decimals to precise fractions of a given denominator with
+      `round_to_fraction()`](#round-decimals-to-precise-fractions-of-a-given-denominator-with-round_to_fraction)
+    - [Fix dates stored as serial numbers with
+      `excel_numeric_to_date()`](#fix-dates-stored-as-serial-numbers-with-excel_numeric_to_date)
+    - [Convert a mix of date and datetime formats to
+      date](#convert-a-mix-of-date-and-datetime-formats-to-date)
+    - [Elevate column names stored in a data.frame
+      row](#elevate-column-names-stored-in-a-dataframe-row)
+    - [Find the header row buried within a messy
+      data.frame](#find-the-header-row-buried-within-a-messy-dataframe)
+  - [Exploring](#exploring-1)
+    - [Count factor levels in groups of high, medium, and low with
+      `top_levels()`](#count-factor-levels-in-groups-of-high-medium-and-low-with-top_levels)
 
 The janitor functions expedite the initial data exploration and cleaning
 that comes with any new data set. This catalog describes the usage for
@@ -78,7 +55,7 @@ Functions for everyday use.
 
 Call this function every time you read data.
 
-It works in a `%>%` pipeline, and handles problematic variable names,
+It works in a `%>%` pipeline and handles problematic variable names,
 especially those that are so well-preserved by `readxl::read_excel()`
 and `readr::read_csv()`.
 
@@ -94,8 +71,10 @@ and `readr::read_csv()`.
 ``` r
 # Create a data.frame with dirty names
 test_df <- as.data.frame(matrix(ncol = 6))
-names(test_df) <- c("firstName", "ábc@!*", "% successful (2009)",
-                    "REPEAT VALUE", "REPEAT VALUE", "")
+names(test_df) <- c(
+  "firstName", "ábc@!*", "% successful (2009)",
+  "REPEAT VALUE", "REPEAT VALUE", ""
+)
 ```
 
 Clean the variable names, returning a data.frame:
@@ -111,8 +90,8 @@ Compare to what base R produces:
 
 ``` r
 make.names(names(test_df))
-#> [1] "firstName"            "ábc..."               "X..successful..2009." "REPEAT.VALUE"         "REPEAT.VALUE"        
-#> [6] "X"
+#> [1] "firstName"            "ábc..."               "X..successful..2009."
+#> [4] "REPEAT.VALUE"         "REPEAT.VALUE"         "X"
 ```
 
 This function is powered by the underlying exported function
@@ -229,10 +208,11 @@ sets of one-to-one clusters:
 
 ``` r
 library(dplyr)
-starwars[1:4,] %>%
+starwars[1:4, ] %>%
   get_one_to_one()
 #> [[1]]
-#> [1] "name"       "height"     "mass"       "skin_color" "birth_year" "films"     
+#> [1] "name"       "height"     "mass"       "skin_color" "birth_year"
+#> [6] "films"     
 #> 
 #> [[2]]
 #> [1] "hair_color" "starships" 
@@ -250,7 +230,7 @@ than the equivalent code they replace.
 
 ### Manipulate vectors of names with `make_clean_names()`
 
-Like base R’s `make.names()`, but with the stylings and case choice of
+Like base R’s `make.names()`, but with the styling and case choice of
 the long-time janitor function `clean_names()`. While `clean_names()` is
 still offered for use in data.frame pipeline with `%>%`,
 `make_clean_names()` allows for more general usage, e.g., on a vector.
@@ -273,7 +253,7 @@ tibble::as_tibble(iris, .name_repair = janitor::make_clean_names)
 #>  8          5           3.4          1.5         0.2 setosa 
 #>  9          4.4         2.9          1.4         0.2 setosa 
 #> 10          4.9         3.1          1.5         0.1 setosa 
-#> # … with 140 more rows
+#> # ℹ 140 more rows
 ```
 
 ### Validate that a column has a `single_value()` per group
@@ -290,7 +270,8 @@ where it should not:
 ``` r
 not_one_to_one <- data.frame(
   X = rep(1:3, each = 2),
-  Y = c(rep(1:2, each = 2), 1:2))
+  Y = c(rep(1:2, each = 2), 1:2)
+)
 
 not_one_to_one
 #>   X Y
@@ -303,12 +284,13 @@ not_one_to_one
 
 # throws informative error:
 try(not_one_to_one %>%
-      dplyr::group_by(X) %>%
-      dplyr::mutate(
-        Z = single_value(Y, info = paste("Calculating Z for group X =", X)))
-      )
+  dplyr::group_by(X) %>%
+  dplyr::mutate(
+    Z = single_value(Y, info = paste("Calculating Z for group X =", X))
+  ))
 #> Error in dplyr::mutate(., Z = single_value(Y, info = paste("Calculating Z for group X =",  : 
-#>   ℹ In argument: `Z = single_value(Y, info = paste("Calculating Z for group X =", X))`.
+#>   ℹ In argument: `Z = single_value(Y, info = paste("Calculating Z for
+#>   group X =", X))`.
 #> ℹ In group 3: `X = 3`.
 #> Caused by error in `single_value()`:
 #> ! More than one (2) value found (1, 2): Calculating Z for group X = 3: Calculating Z for group X = 3
@@ -320,9 +302,11 @@ Does what it says. For cases like cleaning Excel files that contain
 empty rows and columns after being read into R.
 
 ``` r
-q <- data.frame(v1 = c(1, NA, 3),
-                v2 = c(NA, NA, NA),
-                v3 = c("a", NA, "b"))
+q <- data.frame(
+  v1 = c(1, NA, 3),
+  v2 = c(NA, NA, NA),
+  v3 = c("a", NA, "b")
+)
 q %>%
   remove_empty(c("rows", "cols"))
 #>   v1 v3
@@ -419,8 +403,10 @@ names of the data.frame and optionally (by default) remove the row in
 which names were stored and/or the rows above it.
 
 ``` r
-dirt <- data.frame(X_1 = c(NA, "ID", 1:3),
-           X_2 = c(NA, "Value", 4:6))
+dirt <- data.frame(
+  X_1 = c(NA, "ID", 1:3),
+  X_2 = c(NA, "Value", 4:6)
+)
 
 row_to_names(dirt, 2)
 #>   ID Value
@@ -454,7 +440,8 @@ grouped into head/middle/tail groups.
 
 ``` r
 f <- factor(c("strongly agree", "agree", "neutral", "neutral", "disagree", "strongly agree"),
-            levels = c("strongly agree", "agree", "neutral", "disagree", "strongly disagree"))
+  levels = c("strongly agree", "agree", "neutral", "disagree", "strongly disagree")
+)
 top_levels(f)
 #>                            f n   percent
 #>        strongly agree, agree 3 0.5000000
diff --git a/vignettes/tabyls.md b/vignettes/tabyls.md
index ea526931..262a1801 100644
--- a/vignettes/tabyls.md
+++ b/vignettes/tabyls.md
@@ -254,7 +254,7 @@ humans %>%
   function or using janitor’s `round_half_up()` to round all ties up
   ([thanks,
   StackOverflow](https://stackoverflow.com/a/12688836/4470365)).
-  - e.g., round 10.5 up to 11, consistent with Excel’s tie-breaking
+  - e.g., round 10.5 up to 11, consistent with Excel's tie-breaking
     behavior.
     - This contrasts with rounding 10.5 down to 10 as in base R’s
       `round(10.5)`.
@@ -263,7 +263,7 @@ humans %>%
     `adorn_pct_formatting()`; these two functions should not be called
     together.
 - **`adorn_ns()`**: add Ns to a tabyl. These can be drawn from the
-  tabyl’s underlying counts, which are attached to the tabyl as
+  tabyl's underlying counts, which are attached to the tabyl as
   metadata, or they can be supplied by the user.
 - **`adorn_title()`**: add a title to a tabyl (or other data.frame).
   Options include putting the column title in a new row on top of the
@@ -427,7 +427,7 @@ comparison %>%
 #>     Total 100.0% (3,000) 100.0% (3,000) 100.0% (6,000)
 ```
 
-Now we format them to insert the thousands commas. A tabyl’s raw Ns are
+Now we format them to insert the thousands commas. A tabyl's raw Ns are
 stored in its `"core"` attribute. Here we retrieve those with `attr()`,
 then apply the base R function `format()` to all numeric columns.
 Lastly, we append these Ns using `adorn_ns()`.