diff --git a/DESCRIPTION b/DESCRIPTION
index 715ff2f7..b581b4ce 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -12,11 +12,7 @@ Authors@R: c(
person("Josep", family = "Pueyo-Ros", email = "josep.pueyo@udg.edu", role = "ctb")
)
Description: The main janitor functions can: perfectly format data.frame
- column names; provide quick counts of variable combinations (i.e.,
- frequency tables and crosstabs); and explore duplicate records. Other
- janitor functions nicely format the tabulation results. These
- tabulate-and-report functions approximate popular features of SPSS and
- Microsoft Excel. This package follows the principles of the
+ column names and explore duplicate records. This package follows the principles of the
"tidyverse" and works well with the pipe function %>%. janitor was
built with beginning-to-intermediate R users in mind and is optimized
for user-friendliness.
@@ -50,7 +46,6 @@ Suggests:
VignetteBuilder:
knitr
Config/testthat/edition: 3
-Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
diff --git a/NAMESPACE b/NAMESPACE
index 1461f757..c373ceab 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,7 +1,5 @@
# Generated by roxygen2: do not edit by hand
-S3method(chisq.test,default)
-S3method(chisq.test,tabyl)
S3method(clean_names,default)
S3method(clean_names,sf)
S3method(clean_names,tbl_graph)
@@ -13,35 +11,17 @@ S3method(excel_time_to_numeric,POSIXlt)
S3method(excel_time_to_numeric,character)
S3method(excel_time_to_numeric,logical)
S3method(excel_time_to_numeric,numeric)
-S3method(fisher.test,default)
-S3method(fisher.test,tabyl)
-S3method(print,tabyl)
-S3method(tabyl,data.frame)
-S3method(tabyl,default)
export("%>%")
-export(add_totals_col)
-export(add_totals_row)
-export(adorn_crosstab)
-export(adorn_ns)
-export(adorn_pct_formatting)
-export(adorn_percentages)
-export(adorn_rounding)
-export(adorn_title)
-export(adorn_totals)
-export(as_tabyl)
-export(chisq.test)
export(clean_names)
export(compare_df_cols)
export(compare_df_cols_same)
export(convert_to_NA)
export(convert_to_date)
export(convert_to_datetime)
-export(crosstab)
export(describe_class)
export(excel_numeric_to_date)
export(excel_time_to_numeric)
export(find_header)
-export(fisher.test)
export(get_dupes)
export(get_one_to_one)
export(make_clean_names)
@@ -56,9 +36,6 @@ export(row_to_names)
export(sas_numeric_to_date)
export(signif_half_up)
export(single_value)
-export(tabyl)
-export(top_levels)
-export(untabyl)
export(use_first_valid_of)
importFrom(lubridate,as_date)
importFrom(lubridate,as_datetime)
diff --git a/R/adorn_ns.R b/R/adorn_ns.R
deleted file mode 100644
index 36ed4285..00000000
--- a/R/adorn_ns.R
+++ /dev/null
@@ -1,164 +0,0 @@
-#' Add underlying Ns to a tabyl displaying percentages.
-#'
-#' This function adds back the underlying Ns to a `tabyl` whose percentages were
-#' calculated using [adorn_percentages()], to display the Ns and percentages together.
-#' You can also call it on a non-tabyl data.frame to which you wish to append Ns.
-#'
-#' @param dat A data.frame of class `tabyl` that has had `adorn_percentages` and/or
-#' `adorn_pct_formatting` called on it. If given a list of data.frames,
-#' this function will apply itself to each data.frame in the list (designed for 3-way `tabyl` lists).
-#' @param position Should the N go in the front, or in the rear, of the percentage?
-#' @param ns The Ns to append. The default is the "core" attribute of the input tabyl
-#' `dat`, where the original Ns of a two-way `tabyl` are stored. However, if your Ns
-#' are stored somewhere else, or you need to customize them beyond what can be done
-#' with `format_func`, you can supply them here.
-#' @param format_func A formatting function to run on the Ns. Consider defining
-#' with [base::format()].
-#' @param ... Columns to adorn. This takes a tidyselect specification. By default,
-#' all columns are adorned except for the first column and columns not of class
-#' `numeric`, but this allows you to manually specify which columns should be adorned,
-#' for use on a data.frame that does not result from a call to `tabyl`.
-#'
-#' @return A `data.frame` with Ns appended
-#' @export
-#' @examples
-#' mtcars %>%
-#' tabyl(am, cyl) %>%
-#' adorn_percentages("col") %>%
-#' adorn_pct_formatting() %>%
-#' adorn_ns(position = "front")
-#'
-#' # Format the Ns with a custom format_func:
-#' set.seed(1)
-#' bigger_dat <- data.frame(
-#' sex = rep(c("m", "f"), 3000),
-#' age = round(runif(3000, 1, 102), 0)
-#' )
-#' bigger_dat$age_group <- cut(bigger_dat$age, quantile(bigger_dat$age, c(0, 1 / 3, 2 / 3, 1)))
-#'
-#' bigger_dat %>%
-#' tabyl(age_group, sex, show_missing_levels = FALSE) %>%
-#' adorn_totals(c("row", "col")) %>%
-#' adorn_percentages("col") %>%
-#' adorn_pct_formatting(digits = 1) %>%
-#' adorn_ns(format_func = function(x) format(x, big.mark = ".", decimal.mark = ","))
-
-#' # Control the columns to be adorned with the ... variable selection argument
-#' # If using only the ... argument, you can use empty commas as shorthand
-#' # to supply the default values to the preceding arguments:
-#'
-#' cases <- data.frame(
-#' region = c("East", "West"),
-#' year = 2015,
-#' recovered = c(125, 87),
-#' died = c(13, 12)
-#' )
-#'
-#' cases %>%
-#' adorn_percentages("col",,recovered:died) %>%
-#' adorn_pct_formatting(,,,,,recovered:died) %>%
-#' adorn_ns(,,,recovered:died)
-#'
-adorn_ns <- function(dat, position = "rear", ns = attr(dat, "core"), format_func = function(x) {
- format(x, big.mark = ",")
- }, ...) {
- # if input is a list, call purrr::map to recursively apply this function to each data.frame
- if (is.list(dat) && !is.data.frame(dat)) {
- purrr::map(dat, adorn_ns, position) # okay not to pass ns and allow for static Ns, b/c one size fits all for each list entry doesn't make sense for Ns.
- } else {
- ns_provided <- !missing(ns)
-
- # catch bad inputs
- if (!is.data.frame(dat)) {
- stop("adorn_ns() must be called on a data.frame or list of data.frames")
- }
-
- rlang::arg_match0(position, c("front", "rear"))
-
- if (is.null(ns)) {
- stop("argument \"ns\" cannot be null; if not calling adorn_ns() on a data.frame of class \"tabyl\", pass your own value for ns")
- }
- # If ns argument is not the default "core" attribute, validate that it's a data.frame and has correct right dimensions
- if (!is.data.frame(ns)) {
- stop("if supplying a value to the ns argument, it must be of class data.frame")
- }
- if ("one_way" %in% attr(dat, "tabyl_type")) {
- warning("adorn_ns() is meant to be called on a two_way tabyl; consider combining columns of a one_way tabyl with tidyr::unite()")
- }
-
- attrs <- attributes(dat) # save these to re-append later
- custom_ns_supplied <- !(identical(ns, attr(dat, "core")))
-
- if (custom_ns_supplied & !identical(dim(ns), dim(dat))) { # user-supplied Ns must include values for totals row/col if present
- stop("if supplying your own data.frame of Ns to append, its dimensions must match those of the data.frame in the \"dat\" argument")
- }
-
- # If appending the default Ns from the core, and there are totals rows/cols, append those values to the Ns table
- # Custom inputs to ns argument will need to calculate & format their own totals row/cols
- if (!custom_ns_supplied) {
- if (!is.null(attr(dat, "totals"))) { # add totals row/col to core for pasting, if applicable
- ns <- adorn_totals(ns, attr(dat, "totals"))
- ns <- ns[order(match(ns[, 1], dat[, 1])), ] # from #407 - in rare event Totals row has been sorted off the bottom, sort to match
- }
- numeric_cols <- which(vapply(ns, is.numeric, logical(1)))
- ns[] <- lapply(ns, format_func)
- ns[] <- lapply(ns, stringr::str_trim)
- }
-
- if (position == "rear") {
- result <- paste_matrices(dat, ns %>%
- dplyr::mutate(
- dplyr::across(dplyr::everything(), purrr::compose(as.character, wrap_parens, standardize_col_width, .dir = "forward"))
- ))
- } else if (position == "front") {
- result <- paste_matrices(ns, dat %>%
- dplyr::mutate(
- dplyr::across(dplyr::everything(), purrr::compose(as.character, wrap_parens, standardize_col_width, .dir = "forward"))
- ))
- }
- attributes(result) <- attrs
-
- if (custom_ns_supplied & rlang::dots_n(...) == 0) {
- dont_adorn <- 1L
- } else if (rlang::dots_n(...) == 0) {
- cols_to_adorn <- numeric_cols
- dont_adorn <- setdiff(1:ncol(dat), cols_to_adorn)
- dont_adorn <- unique(c(1, dont_adorn)) # always don't-append first column
- } else {
- expr <- rlang::expr(c(...))
- cols_to_adorn <- tidyselect::eval_select(expr, data = dat)
- dont_adorn <- setdiff(1:ncol(dat), cols_to_adorn)
- }
-
- for (i in dont_adorn) {
- result[[i]] <- dat[[i]]
- }
- result
- }
-}
-
-### Helper functions called by adorn_ns
-
-# takes two matrices, pastes them together, keeps spacing of the two columns aligned
-paste_matrices <- function(front, rear) {
- front_matrix <- as.matrix(front)
- rear_matrix <- as.matrix(rear)
-
- # paste the results together
- pasted <- paste(front_matrix, " ", rear_matrix, sep = "") %>% # paste the matrices
- matrix(., nrow = nrow(front_matrix), dimnames = dimnames(rear_matrix)) %>% # cast as matrix, then data.frame
- dplyr::as_tibble()
- pasted
-}
-
-
-# Padding function to standardize a column's width by pre-pending whitespace
-standardize_col_width <- function(x) {
- width <- max(nchar(x))
- sprintf(paste0("%", width, "s"), x)
-}
-
-# Wrap a string in parentheses
-wrap_parens <- function(x) {
- paste0("(", x, ")")
-}
diff --git a/R/adorn_pct_formatting.R b/R/adorn_pct_formatting.R
deleted file mode 100644
index d2d6652d..00000000
--- a/R/adorn_pct_formatting.R
+++ /dev/null
@@ -1,104 +0,0 @@
-#' Format a `data.frame` of decimals as percentages.
-#'
-#' @description
-#' Numeric columns get multiplied by 100 and formatted as
-#' percentages according to user specifications. This function defaults to
-#' excluding the first column of the input data.frame, assuming that it contains
-#' a descriptive variable, but this can be overridden by specifying the columns
-#' to adorn in the `...` argument. Non-numeric columns are always excluded.
-#'
-#' The decimal separator character is the result of `getOption("OutDec")`, which
-#' is based on the user's locale. If the default behavior is undesirable,
-#' change this value ahead of calling the function, either by changing locale or
-#' with `options(OutDec = ",")`. This aligns the decimal separator character
-#' with that used in `base::print()`.
-#'
-#' @param dat a data.frame with decimal values, typically the result of a call
-#' to `adorn_percentages` on a `tabyl`. If given a list of data.frames, this
-#' function will apply itself to each data.frame in the list (designed for
-#' 3-way `tabyl` lists).
-#' @param digits how many digits should be displayed after the decimal point?
-#' @param rounding method to use for rounding - either "half to even", the base
-#' R default method, or "half up", where 14.5 rounds up to 15.
-#' @param affix_sign should the % sign be affixed to the end?
-#' @param ... columns to adorn. This takes a tidyselect specification. By
-#' default, all numeric columns (besides the initial column, if numeric) are
-#' adorned, but this allows you to manually specify which columns should be
-#' adorned, for use on a data.frame that does not result from a call to
-#' `tabyl`.
-#' @return a data.frame with formatted percentages
-#' @export
-#' @examples
-#' mtcars %>%
-#' tabyl(am, cyl) %>%
-#' adorn_percentages("col") %>%
-#' adorn_pct_formatting()
-#'
-#' # Control the columns to be adorned with the ... variable selection argument
-#' # If using only the ... argument, you can use empty commas as shorthand
-#' # to supply the default values to the preceding arguments:
-#'
-#' cases <- data.frame(
-#' region = c("East", "West"),
-#' year = 2015,
-#' recovered = c(125, 87),
-#' died = c(13, 12)
-#' )
-#'
-#' cases %>%
-#' adorn_percentages("col", , recovered:died) %>%
-#' adorn_pct_formatting(, , , recovered:died)
-#'
-adorn_pct_formatting <- function(dat, digits = 1, rounding = "half to even", affix_sign = TRUE, ...) {
- # if input is a list, call purrr::map to recursively apply this function to each data.frame
- if (is.list(dat) && !is.data.frame(dat)) {
- purrr::map(dat, adorn_pct_formatting, digits, rounding, affix_sign)
- } else {
- # catch bad inputs
- if (!is.data.frame(dat)) {
- stop("adorn_pct_formatting() must be called on a data.frame or list of data.frames")
- }
- rlang::arg_match0(rounding, c("half to even", "half up"))
-
- original <- dat # used below to record original instances of NA and NaN
-
- numeric_cols <- which(vapply(dat, is.numeric, logical(1)))
- non_numeric_cols <- setdiff(1:ncol(dat), numeric_cols)
- numeric_cols <- setdiff(numeric_cols, 1) # assume 1st column should not be included so remove it from numeric_cols. Moved up to this line so that if only 1st col is numeric, the function errors
-
- if (rlang::dots_n(...) == 0) {
- cols_to_adorn <- numeric_cols
- } else {
- expr <- rlang::expr(c(...))
- cols_to_adorn <- tidyselect::eval_select(expr, data = dat)
- if (any(cols_to_adorn %in% non_numeric_cols)) {
- # don't need to print a message, adorn_rounding will
- cols_to_adorn <- setdiff(cols_to_adorn, non_numeric_cols)
- }
- }
-
-
- if ("one_way" %in% attr(dat, "tabyl_type")) {
- cols_to_adorn <- setdiff(numeric_cols, 2) # so that it works on a one-way tabyl
- }
-
- if (length(cols_to_adorn) == 0) {
- stop("at least one targeted column must be of class numeric")
- }
-
- dat[cols_to_adorn] <- lapply(dat[cols_to_adorn], function(x) x * 100)
- dat <- adorn_rounding(dat, digits = digits, rounding = rounding, ...)
- dat[cols_to_adorn] <- lapply(dat[cols_to_adorn], function(x) {
- format(x,
- nsmall = digits,
- decimal.mark = getOption("OutDec"),
- trim = TRUE
- )
- }) # so that 0% prints as 0.0% or 0.00% etc.
- if (affix_sign) {
- dat[cols_to_adorn] <- lapply(dat[cols_to_adorn], function(x) paste0(x, "%"))
- }
- dat[cols_to_adorn][is.na(original[cols_to_adorn])] <- "-" # NA and NaN values in the original should be simply "-" for printing of results
- dat
- }
-}
diff --git a/R/adorn_percentages.R b/R/adorn_percentages.R
deleted file mode 100644
index 6536325f..00000000
--- a/R/adorn_percentages.R
+++ /dev/null
@@ -1,119 +0,0 @@
-#' Convert a data.frame of counts to percentages.
-#'
-#' This function defaults to excluding the first column of the input data.frame,
-#' assuming that it contains a descriptive variable, but this can be overridden
-#' by specifying the columns to adorn in the `...` argument.
-#'
-#' @param dat A `tabyl` or other data.frame with a tabyl-like layout.
-#' If given a list of data.frames, this function will apply itself to each
-#' `data.frame` in the list (designed for 3-way `tabyl` lists).
-#' @param denominator The direction to use for calculating percentages.
-#' One of "row", "col", or "all".
-#' @param na.rm should missing values (including `NaN`) be omitted from the calculations?
-#' @param ... columns to adorn. This takes a <[`tidy-select`][dplyr::dplyr_tidy_select]>
-#' specification. By default, all numeric columns (besides the initial column, if numeric)
-#' are adorned, but this allows you to manually specify which columns should
-#' be adorned, for use on a `data.frame` that does not result from a call to [tabyl()].
-#'
-#' @return A `data.frame` of percentages, expressed as numeric values between 0 and 1.
-#' @export
-#' @examples
-#'
-#' mtcars %>%
-#' tabyl(am, cyl) %>%
-#' adorn_percentages("col")
-#'
-#' # calculates correctly even with totals column and/or row:
-#' mtcars %>%
-#' tabyl(am, cyl) %>%
-#' adorn_totals("row") %>%
-#' adorn_percentages()
-#'
-#' # Control the columns to be adorned with the ... variable selection argument
-#' # If using only the ... argument, you can use empty commas as shorthand
-#' # to supply the default values to the preceding arguments:
-#'
-#' cases <- data.frame(
-#' region = c("East", "West"),
-#' year = 2015,
-#' recovered = c(125, 87),
-#' died = c(13, 12)
-#' )
-#'
-#' cases %>%
-#' adorn_percentages(, , recovered:died)
-adorn_percentages <- function(dat, denominator = "row", na.rm = TRUE, ...) {
- # if input is a list, call purrr::map to recursively apply this function to each data.frame
- if (is.list(dat) && !is.data.frame(dat)) {
- purrr::map(dat, adorn_percentages, denominator, na.rm, ...)
- } else {
- # catch bad inputs
- if (!is.data.frame(dat)) {
- stop("adorn_percentages() must be called on a data.frame or list of data.frames")
- }
- rlang::arg_match0(denominator, c("row", "col", "all"))
-
- dat <- as_tabyl(dat)
-
- numeric_cols <- which(vapply(dat, is.numeric, logical(1)))
- non_numeric_cols <- setdiff(1:ncol(dat), numeric_cols)
- numeric_cols <- setdiff(numeric_cols, 1) # assume 1st column should not be included so remove it from numeric_cols. Moved up to this line so that if only 1st col is numeric, the function errors
- explicitly_exempt_totals <- FALSE
-
- if (rlang::dots_n(...) == 0) {
- cols_to_tally <- numeric_cols
- } else {
- expr <- rlang::expr(c(...))
- cols_to_tally <- tidyselect::eval_select(expr, data = dat)
- explicitly_exempt_totals <- !(ncol(dat) %in% cols_to_tally) # if not present, it's b/c user explicitly exempted it
- if (any(cols_to_tally %in% non_numeric_cols)) {
- message("At least one non-numeric column was specified. All non-numeric columns will be removed from percentage calculations.")
- cols_to_tally <- setdiff(cols_to_tally, non_numeric_cols)
- }
- }
-
- if ("col" %in% attr(dat, "totals")) {
- # if there's a totals col, don't use it to calculate the %s
- cols_to_tally <- setdiff(cols_to_tally, ncol(dat))
- }
-
- if (denominator == "row") {
- # if row-wise percentages and a totals column, need to exempt totals col and make it all 1s
- if ("col" %in% attr(dat, "totals") & !explicitly_exempt_totals) {
- dat[[ncol(dat)]] <- rep(1, nrow(dat))
- }
- row_sum <- rowSums(dat[cols_to_tally], na.rm = na.rm)
- dat[, cols_to_tally] <- dat[cols_to_tally] / row_sum
- } else if (denominator == "col") {
- # if col-wise percentages and a row column, need to exempt totals row and make it all 1s
- if ("row" %in% attr(dat, "totals")) {
- col_sum <- colSums(dat[-nrow(dat), ][cols_to_tally], na.rm = na.rm)
- } else {
- col_sum <- colSums(dat[cols_to_tally], na.rm = na.rm)
- }
- # add totals col back to be tallied, #357
- if ("col" %in% attr(dat, "totals") & !explicitly_exempt_totals) {
- cols_to_tally <- c(cols_to_tally, ncol(dat))
- if ("row" %in% attr(dat, "totals")) {
- col_sum <- c(col_sum, sum(dat[-nrow(dat), ncol(dat)]))
- } else {
- col_sum <- c(col_sum, sum(dat[, ncol(dat)]))
- }
- }
- dat[cols_to_tally] <- sweep(dat[cols_to_tally], 2, col_sum, `/`) # from http://stackoverflow.com/questions/9447801/dividing-columns-by-colsums-in-r
- } else if (denominator == "all") {
- # if all-wise percentages, need to exempt any totals col or row
- if ("row" %in% attr(dat, "totals")) {
- complete_n <- sum(dat[-nrow(dat), cols_to_tally], na.rm = TRUE)
- } else {
- complete_n <- sum(dat[, cols_to_tally], na.rm = TRUE)
- }
- # add totals col back to be tallied, #357
- if ("col" %in% attr(dat, "totals") & !explicitly_exempt_totals) {
- cols_to_tally <- c(cols_to_tally, ncol(dat))
- }
- dat[cols_to_tally] <- dat[cols_to_tally] / complete_n
- }
- dat
- }
-}
diff --git a/R/adorn_rounding.R b/R/adorn_rounding.R
deleted file mode 100644
index 60059ed5..00000000
--- a/R/adorn_rounding.R
+++ /dev/null
@@ -1,92 +0,0 @@
-#' Round the numeric columns in a data.frame.
-#'
-#' @description
-#' Can run on any `data.frame` with at least one numeric column.
-#' This function defaults to excluding the first column of the input data.frame,
-#' assuming that it contains a descriptive variable, but this can be overridden by
-#' specifying the columns to round in the `...` argument.
-#'
-#' If you're formatting percentages, e.g., the result of [adorn_percentages()],
-#' use [adorn_pct_formatting()] instead. This is a more flexible variant for ad-hoc usage.
-#' Compared to `adorn_pct_formatting()`, it does not multiply by 100 or pad the
-#' numbers with spaces for alignment in the results `data.frame`.
-#' This function retains the class of numeric input columns.
-#'
-#' @param dat A `tabyl` or other `data.frame` with similar layout.
-#' If given a list of data.frames, this function will apply itself to each
-#' `data.frame` in the list (designed for 3-way `tabyl` lists).
-#' @param digits How many digits should be displayed after the decimal point?
-#' @param rounding Method to use for rounding - either "half to even"
-#' (the base R default method), or "half up", where 14.5 rounds up to 15.
-#' @param ... Columns to adorn. This takes a tidyselect specification.
-#' By default, all numeric columns (besides the initial column, if numeric)
-#' are adorned, but this allows you to manually specify which columns should
-#' be adorned, for use on a data.frame that does not result from a call to `tabyl`.
-#'
-#' @return The `data.frame` with rounded numeric columns.
-#' @export
-#' @examples
-#'
-#' mtcars %>%
-#' tabyl(am, cyl) %>%
-#' adorn_percentages() %>%
-#' adorn_rounding(digits = 2, rounding = "half up")
-#'
-#' # tolerates non-numeric columns:
-#' library(dplyr)
-#' mtcars %>%
-#' tabyl(am, cyl) %>%
-#' adorn_percentages("all") %>%
-#' mutate(dummy = "a") %>%
-#' adorn_rounding()
-#'
-#' # Control the columns to be adorned with the ... variable selection argument
-#' # If using only the ... argument, you can use empty commas as shorthand
-#' # to supply the default values to the preceding arguments:
-#' cases <- data.frame(
-#' region = c("East", "West"),
-#' year = 2015,
-#' recovered = c(125, 87),
-#' died = c(13, 12)
-#' )
-#'
-#' cases %>%
-#' adorn_percentages(, , ends_with("ed")) %>%
-#' adorn_rounding(, , all_of(c("recovered", "died")))
-adorn_rounding <- function(dat, digits = 1, rounding = "half to even", ...) {
- # if input is a list, call purrr::map to recursively apply this function to each data.frame
- if (is.list(dat) && !is.data.frame(dat)) {
- purrr::map(dat, adorn_rounding, digits, rounding, ...)
- } else {
- # catch bad inputs
- if (!is.data.frame(dat)) {
- stop("adorn_rounding() must be called on a data.frame or list of data.frames")
- }
- if (!rounding %in% c("half to even", "half up")) {
- stop("'rounding' must be one of 'half to even' or 'half up'")
- }
- numeric_cols <- which(vapply(dat, is.numeric, logical(1)))
- non_numeric_cols <- setdiff(1:ncol(dat), numeric_cols)
- # assume 1st column should not be included so remove it from numeric_cols.
- # Moved up to this line so that if only 1st col is numeric, the function errors
- numeric_cols <- setdiff(numeric_cols, 1)
-
- if (rlang::dots_n(...) == 0) {
- cols_to_round <- numeric_cols
- } else {
- expr <- rlang::expr(c(...))
- cols_to_round <- tidyselect::eval_select(expr, data = dat)
- if (any(cols_to_round %in% non_numeric_cols)) {
- message("At least one non-numeric column was specified and will not be modified.")
- cols_to_round <- setdiff(cols_to_round, non_numeric_cols)
- }
- }
-
- if (rounding == "half to even") {
- dat[cols_to_round] <- lapply(dat[cols_to_round], function(x) round(x, digits = digits))
- } else {
- dat[cols_to_round] <- lapply(dat[cols_to_round], function(x) round_half_up(x, digits = digits))
- }
- dat
- }
-}
diff --git a/R/adorn_title.R b/R/adorn_title.R
deleted file mode 100644
index fa5a5e8a..00000000
--- a/R/adorn_title.R
+++ /dev/null
@@ -1,113 +0,0 @@
-#' Add column name to the top of a two-way tabyl.
-#'
-#' This function adds the column variable name to the top of a `tabyl` for a
-#' complete display of information. This makes the tabyl prettier, but renders
-#' the `data.frame` less useful for further manipulation.
-#'
-#' The `placement` argument indicates whether the column name should be added to
-#' the `top` of the tabyl in an otherwise-empty row `"top"` or appended to the
-#' already-present row name variable (`"combined"`). The formatting in the `"top"`
-#' option has the look of base R's `table()`; it also wipes out the other column
-#' names, making it hard to further use the `data.frame` besides formatting it for reporting.
-#' The `"combined"` option is more conservative in this regard.
-#'
-#' @param dat A `data.frame` of class `tabyl` or other `data.frame` with a tabyl-like layout.
-#' If given a list of data.frames, this function will apply itself to each `data.frame`
-#' in the list (designed for 3-way `tabyl` lists).
-#' @param placement The title placement, one of `"top"`, or `"combined"`.
-#' See **Details** for more information.
-#' @param row_name (optional) default behavior is to pull the row name from the
-#' attributes of the input `tabyl` object. If you wish to override that text,
-#' or if your input is not a `tabyl`, supply a string here.
-#' @param col_name (optional) default behavior is to pull the column_name from
-#' the attributes of the input `tabyl` object. If you wish to override that text,
-#' or if your input is not a `tabyl`, supply a string here.
-#' @return The input `tabyl`, augmented with the column title. Non-tabyl inputs
-#' that are of class `tbl_df` are downgraded to basic data.frames so that the
-#' title row prints correctly.
-#'
-#' @export
-#' @examples
-#'
-#' mtcars %>%
-#' tabyl(am, cyl) %>%
-#' adorn_title(placement = "top")
-#'
-#' # Adding a title to a non-tabyl
-#' library(tidyr)
-#' library(dplyr)
-#' mtcars %>%
-#' group_by(gear, am) %>%
-#' summarise(avg_mpg = mean(mpg), .groups = "drop") %>%
-#' pivot_wider(names_from = am, values_from = avg_mpg) %>%
-#' adorn_rounding() %>%
-#' adorn_title("top", row_name = "Gears", col_name = "Cylinders")
-adorn_title <- function(dat, placement = "top", row_name, col_name) {
- # if input is a list, call purrr::map to recursively apply this function to each data.frame
- if (is.list(dat) && !is.data.frame(dat)) {
- purrr::map(dat, adorn_title, placement, row_name, col_name)
- } else {
- if (!is.data.frame(dat)) {
- stop("\"dat\" must be a data.frame")
- }
-
- rlang::arg_match0(placement, c("top", "combined"))
-
- if (inherits(dat, "tabyl")) {
- if (attr(dat, "tabyl_type") == "one_way") {
- warning(
- "adorn_title is meant for two-way tabyls, calling it on a one-way tabyl may not yield a meaningful result"
- )
- }
- }
- if (missing(col_name)) {
- if (!inherits(dat, "tabyl")) {
- stop("When input is not a data.frame of class tabyl, a value must be specified for the col_name argument.")
- }
- col_var <- attr(dat, "var_names")$col
- } else {
- if (!is.character(col_name)) {
- stop("col_name must be a string")
- }
- col_var <- col_name
- }
-
- if (!missing(row_name)) {
- if (!is.character(row_name)) {
- stop("row_name must be a string")
- }
- names(dat)[1] <- row_name
- row_var <- row_name
- } else {
- if (inherits(dat, "tabyl")) {
- row_var <- attr(dat, "var_names")$row
- } else {
- # for non-tabyl input, if no row_name supplied, use first existing name
- row_var <- names(dat)[1]
- }
- }
-
-
- if (placement == "top") {
- # to handle factors, problematic in first column and at bind_rows.
- dat[, ] <- lapply(dat[, ], as.character)
- # Can't use mutate_all b/c it strips attributes
- top <- dat[1, ]
-
- top[1, ] <- as.list(names(top))
-
- out <- dplyr::bind_rows(top, dat)
- out <- stats::setNames(out, c("", col_var, rep("", ncol(out) - 2)))
- }
- if (placement == "combined") {
- out <- dat
- names(out)[1] <- paste(row_var, col_var, sep = "/")
- }
- # "top" text doesn't print if input (and thus the output) is a tibble
- if (inherits(out, "tbl_df")) {
- # but this prints row numbers, so don't apply to non-tbl_dfs like tabyls
- out <- as.data.frame(out)
- }
- out
- }
-}
diff --git a/R/adorn_totals.R b/R/adorn_totals.R
deleted file mode 100644
index b4db52f3..00000000
--- a/R/adorn_totals.R
+++ /dev/null
@@ -1,174 +0,0 @@
-#' Append a totals row and/or column to a data.frame
-#'
-#' This function defaults to excluding the first column of the input data.frame,
-#' assuming that it contains a descriptive variable, but this can be overridden
-#' by specifying the columns to be totaled in the `...` argument. Non-numeric
-#' columns are converted to character class and have a user-specified fill character
-#' inserted in the totals row.
-#'
-#' @param dat An input `data.frame` with at least one numeric column. If given a
-#' list of data.frames, this function will apply itself to each `data.frame`
-#' in the list (designed for 3-way `tabyl` lists).
-#' @param where One of "row", "col", or `c("row", "col")`
-#' @param fill If there are non-numeric columns, what should fill the bottom row
-#' of those columns? If a string, relevant columns will be coerced to character.
-#' If `NA` then column types are preserved.
-#' @param na.rm Should missing values (including `NaN`) be omitted from the calculations?
-#' @param name Name of the totals row and/or column. If both are created, and
-#' `name` is a single string, that name is applied to both. If both are created
-#' and `name` is a vector of length 2, the first element of the vector will be
-#' used as the row name (in column 1), and the second element will be used as the
-#' totals column name. Defaults to "Total".
-#' @param ... Columns to total. This takes a tidyselect specification. By default,
-#' all numeric columns (besides the initial column, if numeric) are included in
-#' the totals, but this allows you to manually specify which columns should be
-#' included, for use on a data.frame that does not result from a call to `tabyl`.
-#' @return A `data.frame` augmented with a totals row, column, or both.
-#' The `data.frame` is now also of class `tabyl` and stores information about
-#' the attached totals and underlying data in the tabyl attributes.
-#' @export
-#' @examples
-#' mtcars %>%
-#' tabyl(am, cyl) %>%
-#' adorn_totals()
-adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "Total", ...) {
- if ("both" %in% where) {
- where <- c("row", "col")
- }
- # if input is a list, call purrr::map to recursively apply this function to each data.frame
- if (is.list(dat) && !is.data.frame(dat)) {
- purrr::map(dat, adorn_totals, where, fill, na.rm, name)
- } else {
- if (!is.data.frame(dat)) {
- stop("adorn_totals() must be called on a data.frame or list of data.frames")
- }
-
- numeric_cols <- which(vapply(dat, is.numeric, logical(1)))
- non_numeric_cols <- setdiff(1:ncol(dat), numeric_cols)
-
- if (rlang::dots_n(...) == 0) {
- # by default 1st column is not totaled so remove it from numeric_cols and add to non_numeric_cols
- numeric_cols <- setdiff(numeric_cols, 1)
- non_numeric_cols <- unique(c(1, non_numeric_cols))
- cols_to_total <- numeric_cols
- } else {
- expr <- rlang::expr(c(...))
- cols_to_total <- tidyselect::eval_select(expr, data = dat)
- if (any(cols_to_total %in% non_numeric_cols)) {
- cols_to_total <- setdiff(cols_to_total, non_numeric_cols)
- }
- }
-
- if (length(cols_to_total) == 0) {
- stop("at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions.")
- }
-
- if (sum(where %in% c("row", "col")) != length(where)) {
- stop("\"where\" must be one of \"row\", \"col\", or c(\"row\", \"col\")")
- }
-
- if (length(name) == 1) name <- rep(name, 2)
-
-
- # grouped_df causes problems, #97
- if (inherits(dat, "grouped_df")) {
- dat <- dplyr::ungroup(dat)
- }
-
- dat <- as_tabyl(dat) # even a tabyl needs to be recast as a tabyl to reset the core in case it's been sorted
-
- # set totals attribute
- if (sum(where %in% attr(dat, "totals")) > 0) { # if either of the values of "where" are already in totals attribute
- stop("trying to re-add a totals dimension that is already been added")
- } else if (length(attr(dat, "totals")) == 1) {
- # if totals row OR col has already been adorned, append new axis to the current attribute
- attr(dat, "totals") <- c(attr(dat, "totals"), where)
- } else {
- attr(dat, "totals") <- where
- }
-
- if ("row" %in% where) {
- # capture factor levels if relevant, #494
- factor_input <- is.factor(dat[[1]])
- if (factor_input) {
- col1_backup <- dat[[1]][1]
- }
- # creates the totals row to be appended
- col_sum <- function(a_col, na_rm = na.rm) {
- # can't do this with if_else because it doesn't like the sum() of a character vector,
- # even if that clause is not reached
- if (is.numeric(a_col)) {
- sum(a_col, na.rm = na_rm)
- } else {
- if (!is.character(fill)) { # if fill isn't a character string, use NA consistent with data types
- switch(typeof(a_col),
- "character" = NA_character_,
- "integer" = NA_integer_,
- "double" = if (inherits(a_col, "Date") || inherits(a_col, "POSIXt")) {
- as.Date(NA_real_, origin = "1970-01-01")
- } else {
- NA_real_
- },
- "complex" = NA_complex_,
- NA
- )
- } else {
- fill # otherwise just use the string provided
- }
- }
- }
-
- if (is.character(fill)) { # if fill is a string, keep original implementation
- col_totals <- purrr::map_df(dat, col_sum)
- not_totaled_cols <- setdiff(1:length(col_totals), cols_to_total)
- col_totals[not_totaled_cols] <- fill # reset numeric columns that weren't to be totaled
- dat[not_totaled_cols] <- lapply(dat[not_totaled_cols], as.character) # type compatibility for bind_rows
- } else {
- cols_idx <- seq_along(dat) # get col indexes
- names(cols_idx) <- names(dat) # name them using dat names
-
- col_totals <- purrr::map_df(cols_idx, function(i) {
- if (is.numeric(dat[[i]]) && !i %in% cols_to_total) { # check if numeric and not to be totaled
- switch(typeof(dat[[i]]), # and set to NA
- "integer" = NA_integer_,
- "double" = NA_real_,
- NA
- )
- } else { # otherwise run col_sum on the rest
- col_sum(dat[[i]])
- }
- })
-
- if (!is.character(dat[[1]]) && !1 %in% cols_to_total) {
- # convert first col to character so that name can be appended
- dat[[1]] <- as.character(dat[[1]])
- col_totals[[1]] <- as.character(col_totals[[1]])
- }
- }
-
- if (!1 %in% cols_to_total) { # give users the option to total the first column?? Up to them I guess
- col_totals[1, 1] <- name[1] # replace first column value with name argument
- } else {
- message("Because the first column was specified to be totaled, it does not contain the label 'Total' (or user-specified name) in the totals row")
- }
- dat[(nrow(dat) + 1), ] <- col_totals[1, ] # insert totals_col as last row in dat
- if (factor_input) { # restore factor/ordered info, #494
- dat[[1]] <- factor(dat[[1]],
- levels = c(setdiff(levels(col1_backup), name[1]), name[1]), # don't add if level is present
- ordered = is.ordered(col1_backup)
- )
- }
- }
-
- if ("col" %in% where) {
- # Add totals col
- row_totals <- dat %>%
- dplyr::select(dplyr::all_of(cols_to_total) & dplyr::where(is.numeric)) %>%
- dplyr::transmute(Total = rowSums(., na.rm = na.rm))
-
- dat[[name[2]]] <- row_totals$Total
- }
-
- dat
- }
-}
diff --git a/R/as_and_untabyl.R b/R/as_and_untabyl.R
deleted file mode 100644
index 235b3758..00000000
--- a/R/as_and_untabyl.R
+++ /dev/null
@@ -1,117 +0,0 @@
-#' Add `tabyl` attributes to a data.frame
-#'
-#' @description
-#' A `tabyl` is a `data.frame` containing counts of a variable or
-#' co-occurrences of two variables (a.k.a., a contingency table or crosstab).
-#' This specialized kind of data.frame has attributes that enable `adorn_`
-#' functions to be called for precise formatting and presentation of results.
-#' E.g., display results as a mix of percentages, Ns, add totals rows or
-#' columns, rounding options, in the style of Microsoft Excel PivotTable.
-#'
-#' A `tabyl` can be the result of a call to `janitor::tabyl()`, in which case
-#' these attributes are added automatically. This function adds `tabyl` class
-#' attributes to a data.frame that isn't the result of a call to `tabyl` but
-#' meets the requirements of a two-way tabyl: 1) First column contains values of
-#' variable 1 2) Column names 2:n are the values of variable 2 3) Numeric values
-#' in columns 2:n are counts of the co-occurrences of the two variables.*
-#'
-#' * = this is the ideal form of a `tabyl`, but janitor's `adorn_` functions tolerate
-#' and ignore non-numeric columns in positions 2:n.
-#'
-#' For instance, the result of [dplyr::count()] followed by [tidyr::pivot_wider()]
-#' can be treated as a `tabyl`.
-#'
-#' The result of calling [tabyl()] on a single variable is a special class of
-#' one-way tabyl; this function only pertains to the two-way tabyl.
-#'
-#' @param dat a data.frame with variable values in the first column and numeric
-#' values in all other columns.
-#' @param axes is this a two_way tabyl or a one_way tabyl? If this function is
-#' being called by a user, this should probably be "2". One-way tabyls are
-#' created by `tabyl` but are a special case.
-#' @param row_var_name (optional) the name of the variable in the row dimension;
-#' used by `adorn_title()`.
-#' @param col_var_name (optional) the name of the variable in the column
-#' dimension; used by `adorn_title()`.
-#' @return Returns the same data.frame, but with the additional class of "tabyl"
-#' and the attribute "core".
-#' @export
-#' @examples
-#' as_tabyl(mtcars)
-#'
-as_tabyl <- function(dat, axes = 2, row_var_name = NULL, col_var_name = NULL) {
- if (!axes %in% 1:2) {
- stop("axes must be either 1 or 2")
- }
-
- # check whether input meets requirements
- if (!is.data.frame(dat)) {
- stop("input must be a data.frame")
- }
- if (sum(unlist(lapply(dat, is.numeric))[-1]) == 0) {
- stop("at least one one of columns 2:n must be of class numeric")
- }
-
- # assign core attribute and classes
- if (inherits(dat, "tabyl")) {
- # if already a tabyl, may have totals row.
- # Safest play is to simply reorder the core rows to match the dat rows
- attr(dat, "core") <- attr(dat, "core")[order(match(
- attr(dat, "core")[, 1],
- dat[, 1]
- )), ]
- row.names(attr(dat, "core")) <- 1:nrow(attr(dat, "core")) # if they're sorted in the prior step above, this resets
- } else {
- attr(dat, "core") <- as.data.frame(dat) # core goes first so dat does not yet have attributes attached to it
- }
-
- attr(dat, "tabyl_type") <- ifelse(
- !is.null(attr(dat, "tabyl_type")),
- attr(dat, "tabyl_type"), # if a one_way tabyl has as_tabyl called on it, it should stay a one_way #523
- dplyr::case_when(
- axes == 1 ~ "one_way",
- axes == 2 ~ "two_way"
- )
- )
- class(dat) <- c("tabyl", setdiff(class(dat), "tabyl"))
-
- if (!missing(row_var_name) | !missing(col_var_name)) {
- if (axes != 2) {
- stop("variable names are only meaningful for two-way tabyls")
- }
- attr(dat, "var_names") <- list(row = row_var_name, col = col_var_name)
- }
-
- dat
-}
-
-#' Remove `tabyl` attributes from a data.frame.
-#'
-#' Strips away all `tabyl`-related attributes from a data.frame.
-#'
-#' @param dat a `data.frame` of class `tabyl`.
-#' @return the same `data.frame`, but without the `tabyl` class and attributes.
-#' @export
-#' @examples
-#'
-#' mtcars %>%
-#' tabyl(am) %>%
-#' untabyl() %>%
-#' attributes() # tabyl-specific attributes are gone
-untabyl <- function(dat) {
- # if input is a list, call purrr::map to recursively apply this function to each data.frame
- if (is.list(dat) && !is.data.frame(dat)) {
- purrr::map(dat, untabyl)
- } else {
- if (!inherits(dat, "tabyl")) {
- warning("untabyl() called on a non-tabyl")
- }
- class(dat) <- class(dat)[!class(dat) %in% "tabyl"]
- attr(dat, "core") <- NULL
- # These attributes may not exist, but simpler to declare them NULL regardless than to check to see if they exist:
- attr(dat, "totals") <- NULL
- attr(dat, "tabyl_type") <- NULL # may not exist, but simpler to declare it NULL regardless than to check to see if it exists
- attr(dat, "var_names") <- NULL # may not exist, but simpler to declare it NULL regardless than to check to see if it exists
- dat
- }
-}
diff --git a/R/get_level_groups.R b/R/get_level_groups.R
deleted file mode 100644
index 0f3e6a37..00000000
--- a/R/get_level_groups.R
+++ /dev/null
@@ -1,28 +0,0 @@
-# Return groupings for a factor variable in the top_levels() function
-
-get_level_groups <- function(vec, n, num_levels_in_var) {
- top_n_lvls <- paste(levels(vec)[1:n], collapse = ", ")
- bot_n_lvls <- paste(levels(vec)[(num_levels_in_var - n + 1):num_levels_in_var], collapse = ", ")
-
- # Identify middle combinations, if needed
- if (num_levels_in_var > 2 * n) {
- mid_lvls <- paste(levels(vec)[(n + 1):(num_levels_in_var - n)], collapse = ", ")
- } else {
- mid_lvls <- NA
- }
-
- # Truncate strings if needed
- ## Middle groups are variable size, so displaying the N there is useful;
- ## Top/Bottom are user-specified size, so just truncate the labels
- if (!is.na(mid_lvls) & nchar(mid_lvls) > 30) {
- mid_lvls <- paste0("<<< Middle Group (", num_levels_in_var - 2 * n, " categories) >>>")
- }
- if (nchar(top_n_lvls) > 30) {
- top_n_lvls <- paste0(substr(top_n_lvls, 1, 27), "...")
- }
- if (nchar(bot_n_lvls) > 30) {
- bot_n_lvls <- paste0(substr(bot_n_lvls, 1, 27), "...")
- }
-
- list(top = top_n_lvls, mid = mid_lvls, bot = bot_n_lvls)
-}
diff --git a/R/janitor_deprecated.R b/R/janitor_deprecated.R
index 946a2924..6a558ed6 100644
--- a/R/janitor_deprecated.R
+++ b/R/janitor_deprecated.R
@@ -2,12 +2,8 @@
#'
#' These functions have already become defunct or may be defunct as soon as the next release.
#'
-#' * [adorn_crosstab()] -> `adorn_`
-#' * [crosstab()] -> [tabyl()]
#' * [use_first_valid_of()] -> [dplyr::coalesce()]
#' * [convert_to_NA()] -> [dplyr::na_if()]
-#' * [add_totals_col()] -> [`adorn_totals(where = "col")`][adorn_totals()]
-#' * [add_totals_row()] -> [adorn_totals()]
#' * [remove_empty_rows()] -> [`remove_empty("rows")`][remove_empty()]
#' * [remove_empty_cols()] -> [`remove_empty("cols")`][remove_empty()]
#'
@@ -16,88 +12,6 @@
# EXCLUDE COVERAGE START
NULL
-
-
-
-#' @title Generate a crosstabulation of two vectors.
-#' @param ... arguments
-#' @keywords internal
-#' @description
-#' This function is deprecated, use [`tabyl(dat, var1, var2)`][tabyl()] instead.
-#' @export
-
-crosstab <- function(...) {
- lifecycle::deprecate_stop(
- when = "2.0.0",
- what = "janitor::crosstab()",
- with = "tabyl()",
- details = "See the guide to tabyl(): https://cran.r-project.org/web/packages/janitor/vignettes/tabyls.html"
- )
-}
-
-#' @title Add presentation formatting to a crosstabulation table.
-#' @description
-#' This function is deprecated, use [tabyl()] with the `adorn_` family of functions instead.
-#' @param dat a data.frame with row names in the first column and numeric values in all other columns. Usually the piped-in result of a call to `crosstab` that included the argument `percent = "none"`.
-#' @param denom the denominator to use for calculating percentages. One of "row", "col", or "all".
-#' @param show_n should counts be displayed alongside the percentages?
-#' @param digits how many digits should be displayed after the decimal point?
-#' @param show_totals display a totals summary? Will be a row, column, or both depending on the value of `denom`.
-#' @param rounding method to use for truncating percentages - either "half to even", the base R default method, or "half up", where 14.5 rounds up to 15.
-#' @return Returns a data.frame.
-#' @keywords internal
-#' @export
-
-adorn_crosstab <- function(dat, denom = "row", show_n = TRUE, digits = 1, show_totals = FALSE, rounding = "half to even") {
- lifecycle::deprecate_stop(
- when = "2.0.0",
- what = "janitor::adorn_crosstab()",
- with = "tabyl()",
- details = "See the adorn_* functions for formatting a tabyl: https://cran.r-project.org/web/packages/janitor/vignettes/tabyls.html"
- )
-}
-
-#' @title Append a totals row to a data.frame.
-#'
-#' @description
-#' This function is deprecated, use [adorn_totals()] instead.
-#'
-#' @param dat an input data.frame with at least one numeric column.
-#' @param fill if there are more than one non-numeric columns, what string should fill the bottom row of those columns?
-#' @param na.rm should missing values (including NaN) be omitted from the calculations?
-#' @return Returns a data.frame with a totals row, consisting of "Total" in the first column and column sums in the others.
-#' @keywords internal
-#' @export
-add_totals_row <- function(dat, fill = "-", na.rm = TRUE) {
- lifecycle::deprecate_stop(
- when = "2.0.0",
- what = "janitor::add_totals_row()",
- with = "adorn_totals()",
- details = "See the adorn_* functions for formatting a tabyl or data.frame: https://cran.r-project.org/web/packages/janitor/vignettes/tabyls.html"
- )
-}
-
-#' @title Append a totals column to a data.frame.
-#'
-#' @description
-#' This function is deprecated, use [`adorn_totals(where = "col")`][adorn_totals()] instead.
-#'
-#' @param dat an input data.frame with at least one numeric column.
-#' @param na.rm should missing values (including NaN) be omitted from the calculations?
-#' @keywords internal
-#' @return Returns a data.frame with a totals column containing row-wise sums.
-#' @export
-
-add_totals_col <- function(dat, na.rm = TRUE) {
- lifecycle::deprecate_stop(
- when = "2.0.0",
- what = "janitor::add_totals_cols()",
- with = "adorn_totals()",
- details = "See the adorn_* functions for formatting a tabyl or data.frame: https://cran.r-project.org/web/packages/janitor/vignettes/tabyls.html"
- )
-}
-
-
#' @title Returns first non-`NA` value from a set of vectors.
#'
#' @description
diff --git a/R/print_tabyl.R b/R/print_tabyl.R
deleted file mode 100644
index b971bc40..00000000
--- a/R/print_tabyl.R
+++ /dev/null
@@ -1,4 +0,0 @@
-#' @export
-print.tabyl <- function(x, ...) {
- print.data.frame(x, row.names = FALSE)
-}
diff --git a/R/statistical_tests.R b/R/statistical_tests.R
deleted file mode 100644
index b0fe5b51..00000000
--- a/R/statistical_tests.R
+++ /dev/null
@@ -1,205 +0,0 @@
-#' Apply `stats::chisq.test()` to a two-way tabyl
-#'
-#' @description
-#' This generic function overrides `stats::chisq.test`. If the passed table
-#' is a two-way tabyl, it runs it through janitor::chisq.test.tabyl, otherwise
-#' it just calls `stats::chisq.test()`.
-#'
-#' @param x a two-way tabyl, a numeric vector or a factor
-#' @param ... other parameters passed to [stats::chisq.test()]
-#' @return The result is the same as the one of `stats::chisq.test()`.
-#' If `tabyl_results` is `TRUE`, the returned tables `observed`, `expected`,
-#' `residuals` and `stdres` are converted to tabyls.
-#'
-#' @examples
-#' tab <- tabyl(mtcars, gear, cyl)
-#' chisq.test(tab)
-#' chisq.test(tab)$residuals
-#'
-#' @export
-
-chisq.test <- function(x, ...) {
- UseMethod("chisq.test")
-}
-
-
-#' @rdname chisq.test
-#' @method chisq.test default
-#' @param y if x is a vector, must be another vector or factor of the same length
-#' @export
-
-chisq.test.default <- function(x, y = NULL, ...) {
- # keep track of object names to keep `data.name` attribute
- if (!is.null(y)) {
- dname_x <- deparse(substitute(x))
- dname_y <- deparse(substitute(y))
- dname <- paste(dname_x, "and", dname_y)
- } else {
- dname <- deparse(substitute(x))
- }
-
- result <- stats::chisq.test(x, y, ...)
-
- # Replace object name in result for strict equality with stats::chisq.test
- result$data.name <- dname
- if (!is.null(y)) {
- names(attr(result$observed, "dimnames")) <- c(dname_x, dname_y)
- names(attr(result$expected, "dimnames")) <- c(dname_x, dname_y)
- names(attr(result$residuals, "dimnames")) <- c(dname_x, dname_y)
- names(attr(result$stdres, "dimnames")) <- c(dname_x, dname_y)
- }
-
- result
-}
-
-
-#' @rdname chisq.test
-#' @method chisq.test tabyl
-#' @param tabyl_results If `TRUE` and `x` is a tabyl object,
-#' also return `observed`, `expected`, `residuals` and `stdres` as tabyl.
-#' @export
-
-chisq.test.tabyl <- function(x, tabyl_results = TRUE, ...) {
- # keep track of object name to keep `data.name` attribute
- dname <- deparse(substitute(x))
-
- # check if table is a two-way tabyl
- if (!(inherits(x, "tabyl") && attr(x, "tabyl_type") == "two_way")) {
- stop("chisq.test.tabyl() must be applied to a two-way tabyl object")
- }
-
- # check for and remove totals row / column, if present
- if (!is.null(attr(x, "totals"))) {
- if ("row" %in% attr(x, "totals")) {
- x <- x[-nrow(x), ]
- }
- if ("col" %in% attr(x, "totals")) {
- # this causes the var_names attribute to become NULL, not sure why
- x[ncol(x)] <- NULL
- }
- warning("janitor::chisq.test.tabyl() detected a totals row and/or column. The totals were removed from the tabyl before the test was run.
- If you intend to include the totals row and/or column in the test, first call untabyl() on the data.frame, then proceed from there.")
- }
-
- rownames(x) <- x[[1]]
-
- result <- x %>%
- dplyr::select(-1) %>%
- as.matrix() %>%
- as.table() %>%
- stats::chisq.test(...)
-
- # Replace values and attributes for strict object equality
- result$data.name <- dname
- names(attr(result$observed, "dimnames")) <- c("", "")
- names(attr(result$expected, "dimnames")) <- c("", "")
- names(attr(result$residuals, "dimnames")) <- c("", "")
- names(attr(result$stdres, "dimnames")) <- c("", "")
-
- # Return results tables as tabyl
- if (tabyl_results) {
- # Keep track of row names column name and var_names attributes
- rownames_column <- names(x)[1]
- var_names <- attr(x, "var_names")
-
- # For each returned table, convert it to a two-way tabyl
- tables <- c("observed", "expected", "residuals", "stdres")
- for (table in tables) {
- tab <- result[[table]]
- ttab <- as.data.frame.matrix(tab)
- ttab[[rownames_column]] <- rownames(tab)
- ttab <- ttab %>% dplyr::select(!!rownames_column, dplyr::everything())
- ttab <- as_tabyl(ttab)
- attr(ttab, "var_names") <- var_names
- result[[table]] <- ttab
- }
- }
-
- result
-}
-
-
-
-#' Apply `stats::fisher.test()` to a two-way tabyl
-#'
-#' This generic function overrides [stats::fisher.test()]. If the passed table
-#' is a two-way tabyl, it runs it through `janitor::fisher.test.tabyl`, otherwise
-#' it just calls `stats::fisher.test()`.
-#'
-#' @return
-#' The same as the one of `stats::fisher.test()`.
-#'
-#' @param x A two-way tabyl, a numeric vector or a factor
-#' @param ... Parameters passed to [stats::fisher.test()]
-#'
-#' @examples
-#' tab <- tabyl(mtcars, gear, cyl)
-#' fisher.test(tab)
-#'
-#' @export
-
-fisher.test <- function(x, ...) {
- UseMethod("fisher.test")
-}
-
-
-#' @rdname fisher.test
-#' @method fisher.test default
-#' @param y if x is a vector, must be another vector or factor of the same length
-#' @export
-
-fisher.test.default <- function(x, y = NULL, ...) {
- # keep track of object names to keep `data.name` attribute
- if (!is.null(y)) {
- dname_x <- deparse(substitute(x))
- dname_y <- deparse(substitute(y))
- dname <- paste(dname_x, "and", dname_y)
- } else {
- dname <- deparse(substitute(x))
- }
-
- result <- stats::fisher.test(x, y, ...)
- result$data.name <- dname
-
- result
-}
-
-
-#' @rdname fisher.test
-#' @method fisher.test tabyl
-#' @export
-
-fisher.test.tabyl <- function(x, ...) {
- # keep track of object name to keep `data.name` attribute
- dname <- deparse(substitute(x))
-
- # check if table is a two-way tabyl
- if (!(inherits(x, "tabyl") && attr(x, "tabyl_type") == "two_way")) {
- stop("fisher.test.tabyl() must be applied to a two-way tabyl object")
- }
-
- # check for and remove totals row / column, if present
- if (!is.null(attr(x, "totals"))) {
- if ("row" %in% attr(x, "totals")) {
- x <- x[-nrow(x), ]
- }
- if ("col" %in% attr(x, "totals")) {
- x[ncol(x)] <- NULL
- }
- warning("janitor::fisher.test.tabyl() detected a totals row and/or column. The totals were removed from the tabyl before the test was run.
- If you intend to include the totals row and/or column in the test, first call untabyl() on the data.frame, then proceed from there.")
- }
-
- rownames(x) <- x[[1]]
-
- result <- x %>%
- dplyr::select(-1) %>%
- as.matrix() %>%
- as.table() %>%
- stats::fisher.test(...)
-
- # Replace values and attributes for strict object equality
- result$data.name <- dname
-
- result
-}
diff --git a/R/tabyl.R b/R/tabyl.R
deleted file mode 100644
index 9e534473..00000000
--- a/R/tabyl.R
+++ /dev/null
@@ -1,346 +0,0 @@
-#' Generate a frequency table (1-, 2-, or 3-way).
-#'
-#' @description
-#' A fully-featured alternative to `table()`. Results are data.frames and can be
-#' formatted and enhanced with janitor's family of `adorn_` functions.
-#'
-#' Specify a `data.frame` and the one, two, or three unquoted column names you
-#' want to tabulate. Three variables generates a list of 2-way tabyls,
-#' split by the third variable.
-#'
-#' Alternatively, you can tabulate a single variable that isn't in a `data.frame`
-#' by calling `tabyl()` on a vector, e.g., `tabyl(mtcars$gear)`.
-#'
-#' @param dat A `data.frame` containing the variables you wish to count.
-#' Or, a vector you want to tabulate.
-#' @param var1 The column name of the first variable.
-#' @param var2 (optional) the column name of the second variable
-#' (its values become the column names in a 2-way tabulation).
-#' @param var3 (optional) the column name of the third variable
-#' (a 3-way tabulation is split into a list on its values).
-#' @param show_na Should counts of `NA` values be displayed? In a one-way tabyl,
-#' the presence of `NA` values triggers an additional column showing valid percentages
-#' (calculated excluding `NA` values).
-#' @param show_missing_levels Should counts of missing levels of factors be displayed?
-#' These will be rows and/or columns of zeroes. Useful for keeping consistent
-#' output dimensions even when certain factor levels may not be present in the data.
-#' @param ... Additional arguments passed to methods.
-#' @return A `data.frame` with frequencies and percentages of the tabulated variable(s).
-#' A 3-way tabulation returns a list of data frames.
-#' @export
-#' @examples
-#'
-#' tabyl(mtcars, cyl)
-#' tabyl(mtcars, cyl, gear)
-#' tabyl(mtcars, cyl, gear, am)
-#'
-#' # or using the %>% pipe
-#' mtcars %>%
-#' tabyl(cyl, gear)
-#'
-#' # illustrating show_na functionality:
-#' my_cars <- rbind(mtcars, rep(NA, 11))
-#' my_cars %>% tabyl(cyl)
-#' my_cars %>% tabyl(cyl, show_na = FALSE)
-#'
-#' # Calling on a single vector not in a data.frame:
-#' val <- c("hi", "med", "med", "lo")
-#' tabyl(val)
-tabyl <- function(dat, ...) UseMethod("tabyl")
-
-
-
-#' @export
-#' @rdname tabyl
-# this method runs when tabyl() is called on plain vectors; tabyl_1way
-# also reverts to this method
-
-tabyl.default <- function(dat, show_na = TRUE, show_missing_levels = TRUE, ...) {
- if (is.list(dat) && !"data.frame" %in% class(dat)) {
- stop("tabyl() is meant to be called on vectors and data.frames; convert non-data.frame lists to one of these types")
- }
- # catch and adjust input variable name.
- if (is.null(names(dat)) || is.vector(dat)) {
- var_name <- deparse(substitute(dat))
- } else {
- var_name <- names(dat)
- }
-
- # useful error message if input vector doesn't exist
- if (is.null(dat)) {
- stop(paste0("object ", var_name, " not found"))
- }
- # an odd variable name can be deparsed into a vector of length >1, rare but throws warning, see issue #87
- if (length(var_name) > 1) {
- var_name <- paste(var_name, collapse = "")
- }
-
- # Try to retrieve label
- if (is.data.frame(dat)) {
- var_label <- attr(dat[, var_name], "label", exact = TRUE) %||% var_name
- } else {
- var_label <- attr(dat, "label", exact = TRUE) %||% var_name
- }
-
- # if show_na is not length-1 logical, error helpfully (#377)
- if (length(show_na) > 1 || !inherits(show_na, "logical")) {
- stop("The value supplied to the \"show_na\" argument must be TRUE or FALSE.\n\nDid you try to call tabyl on two vectors, like tabyl(data$var1, data$var2) ? To create a two-way tabyl, the two vectors must be in the same data.frame, and the function should be called like this: \n
- tabyl(data, var1, var2)
- or
- data %>% tabyl(var1, var2). \n\nSee ?tabyl for more.")
- }
-
- # calculate initial counts table
- # convert vector to a 1 col data.frame
- if (mode(dat) %in% c("logical", "numeric", "character", "list") && !is.matrix(dat)) {
- # to preserve factor properties when vec is passed in as a list from data.frame method:
- if (is.list(dat)) {
- dat <- dat[[1]]
- }
- dat_df <- data.frame(dat, stringsAsFactors = is.factor(dat))
- names(dat_df)[1] <- "dat"
- result <- dat_df %>% dplyr::count(dat)
-
- if (is.factor(dat) && show_missing_levels) {
- expanded <- tidyr::expand(result, dat)
- result <- merge( # can't use left_join b/c NA matching changed in 0.6.0
- x = expanded,
- y = result,
- by = "dat",
- all.x = TRUE,
- all.y = TRUE
- )
- result <- dplyr::arrange(result, dat) # restore sorting by factor level
- }
- } else {
- stop("input must be a vector of type logical, numeric, character, list, or factor")
- }
-
- # calculate percent, move NA row to bottom
- result <- result %>%
- dplyr::mutate(percent = n / sum(n, na.rm = TRUE))
-
- # sort the NA row to the bottom, necessary to retain factor sorting
- result <- result[order(is.na(result$dat)), ]
- result$is_na <- NULL
-
- # replace all NA values with 0 - only applies to missing factor levels
- result <- tidyr::replace_na(result, replace = list(n = 0, percent = 0))
-
- ## NA handling:
- # if there are NA values & show_na = T, calculate valid % as a new column
- if (show_na && sum(is.na(result[[1]])) > 0) {
- valid_total <- sum(result$n[!is.na(result[[1]])], na.rm = TRUE)
- result$valid_percent <- result$n / valid_total
- result$valid_percent[is.na(result[[1]])] <- NA
- } else { # don't show NA values, which necessitates adjusting the %s
- result <- result %>%
- dplyr::filter(!is.na(.[, 1])) %>%
- dplyr::mutate(percent = n / sum(n, na.rm = TRUE)) # recalculate % without NAs
- }
-
- # reassign correct variable name (or label if it exists)
- names(result)[1] <- var_label
-
- # in case input var name was "n" or "percent", call helper function to set unique names
- result <- handle_if_special_names_used(result)
-
- data.frame(result, check.names = FALSE) %>%
- as_tabyl(axes = 1)
-}
-
-
-
-#' @export
-#' @rdname tabyl
-# Main dispatching function to underlying functions depending on whether "..." contains 1, 2, or 3 variables
-tabyl.data.frame <- function(dat, var1, var2, var3, show_na = TRUE, show_missing_levels = TRUE, ...) {
- if (missing(var1) && missing(var2) && missing(var3)) {
- stop("if calling on a data.frame, specify unquoted column names(s) to tabulate. Did you mean to call tabyl() on a vector?")
- }
- if (dplyr::is_grouped_df(dat)) {
- dat <- dplyr::ungroup(dat)
- }
-
- if (missing(var2) && missing(var3) && !missing(var1)) {
- tabyl_1way(dat, rlang::enquo(var1), show_na = show_na, show_missing_levels = show_missing_levels)
- } else if (missing(var3) && !missing(var1) && !missing(var2)) {
- tabyl_2way(dat, rlang::enquo(var1), rlang::enquo(var2), show_na = show_na, show_missing_levels = show_missing_levels)
- } else if (!missing(var1) &&
- !missing(var2) &&
- !missing(var3)) {
- tabyl_3way(dat, rlang::enquo(var1), rlang::enquo(var2), rlang::enquo(var3), show_na = show_na, show_missing_levels = show_missing_levels)
- } else {
- stop("please specify var1 OR var1 & var2 OR var1 & var2 & var3")
- }
-}
-
-# a one-way frequency table; this was called "tabyl" in janitor <= 0.3.0
-tabyl_1way <- function(dat, var1, show_na = TRUE, show_missing_levels = TRUE) {
- x <- dplyr::select(dat, !!var1)
-
- # gather up arguments, pass them to tabyl.default
- arguments <- list()
- arguments$dat <- x[1]
- arguments$show_na <- show_na
- arguments$show_missing_levels <- show_missing_levels
- do.call(tabyl.default,
- args = arguments
- )
-}
-
-
-# a two-way frequency table; this was called "crosstab" in janitor <= 0.3.0
-tabyl_2way <- function(dat, var1, var2, show_na = TRUE, show_missing_levels = TRUE) {
- dat <- dplyr::select(dat, !!var1, !!var2)
-
- if (!show_na) {
- dat <- dat[!is.na(dat[[1]]) & !is.na(dat[[2]]), ]
- }
- if (nrow(dat) == 0) { # if passed a zero-length input, or an entirely NA input, return a zero-row data.frame
- message("No records to count so returning a zero-row tabyl")
- return(dat %>%
- dplyr::select(1) %>%
- dplyr::slice(0))
- }
-
- tabl <- dat %>%
- dplyr::count(!!var1, !!var2, name = "tabyl_2way_n")
-
- # Optionally expand missing factor levels.
- if (show_missing_levels) {
- tabl <- tidyr::complete(tabl, !!var1, !!var2)
- }
-
- # replace NA with string NA_ in vec2 to avoid invalid col name after spreading
- # if this col is a factor, need to add that level to the factor
- if (is.numeric(tabl[[2]])) { # have numerics treated like factors to not spread alphabetically
- tabl[[2]] <- ordered(tabl[[2]], levels = unique(tabl[[2]]))
- }
- if (is.factor(tabl[[2]])) {
- levels(tabl[[2]]) <- c(levels(tabl[[2]]), "emptystring_", "NA_")
- } else {
- tabl[2] <- as.character(tabl[[2]])
- }
- tabl[2][is.na(tabl[2])] <- "NA_"
- tabl[2][tabl[2] == ""] <- "emptystring_"
- result <- tabl %>%
- tidyr::pivot_wider(
- names_from = !!var2,
- values_from = "tabyl_2way_n",
- values_fn = ~ dplyr::coalesce(.x, 0L),
- values_fill = 0L,
- names_sort = TRUE
- )
- if ("emptystring_" %in% names(result)) {
- result <- result[c(setdiff(names(result), "emptystring_"), "emptystring_")]
- if (getOption("tabyl.emptystring", TRUE) & interactive()) {
- message("The tabyl's column variable contained the empty string value, \"\". This is not a legal column name and has been converted to \"emptystring_\".\nConsider converting \"\" to NA if appropriate.\nThis message is shown once per session and may be disabled by setting options(\"tabyl.emptystring\" = FALSE).") # nocov
- options("tabyl.emptystring" = FALSE) # nocov
- }
- }
- if ("NA_" %in% names(result)) {
- # move NA_ column to end, from http://stackoverflow.com/a/18339562
- result <- result[c(setdiff(names(result), "NA_"), "NA_")]
- }
-
- row_var_name <- names(dat)[1]
- col_var_name <- names(dat)[2]
- names(result)[1] <- attr(dat[, 1], "label", exact = TRUE) %||% names(result)[1]
- data.frame(result, check.names = FALSE) %>%
- as_tabyl(axes = 2, row_var_name = row_var_name, col_var_name = col_var_name)
-}
-
-
-# a list of two-way frequency tables, split into a list on a third variable
-tabyl_3way <- function(dat, var1, var2, var3, show_na = TRUE, show_missing_levels = TRUE) {
- dat <- dplyr::select(dat, !!var1, !!var2, !!var3)
- var3_numeric <- is.numeric(dat[[3]])
-
- # Preserve labels, as attributes are sometimes dropped during transformations.
- var1_label <- attr(dat[, 1], "label", exact = TRUE)
- var2_label <- attr(dat[, 2], "label", exact = TRUE)
-
- # Keep factor levels for ordering the list at the end
- if (is.factor(dat[[3]])) {
- third_levels_for_sorting <- levels(dat[[3]])
- }
- dat[[3]] <- as.character(dat[[3]]) # don't want empty factor levels in the result list - they would be empty data.frames
-
- # grab class of 1st variable to restore it later
- col1_class <- class(dat[[1]])
- col1_levels <- NULL
- if (is.factor(dat[[1]])) {
- col1_levels <- levels(dat[[1]])
- }
-
- # print NA level as its own data.frame, and make it appear last
- if (show_na && sum(is.na(dat[[3]])) > 0) {
- dat[[3]] <- factor(dat[[3]], levels = c(sort(unique(dat[[3]])), "NA_"))
- dat[[3]][is.na(dat[[3]])] <- "NA_"
- if (exists("third_levels_for_sorting")) {
- third_levels_for_sorting <- c(third_levels_for_sorting, "NA_")
- }
- }
-
- if (show_missing_levels) { # needed to have each crosstab in the list aware of all values in the pre-split variables
- dat[[1]] <- as.factor(dat[[1]])
- dat[[2]] <- as.factor(dat[[2]])
- }
-
- result <- split(dat, dat[[rlang::quo_name(var3)]])
- # split() drops attributes, so we manually add back the label attributes.
- result <- lapply(result, function(x) {
- attr(x[[1]], "label") <- var1_label
- attr(x[[2]], "label") <- var2_label
- x
- })
- result <- result %>%
- purrr::map(tabyl_2way, var1, var2, show_na = show_na, show_missing_levels = show_missing_levels) %>%
- purrr::map(reset_1st_col_status, col1_class, col1_levels) # reset class of var in 1st col to its input class, #168
-
- # reorder when var 3 is a factor, per #250
- if (exists("third_levels_for_sorting")) {
- result <- result[order(third_levels_for_sorting[third_levels_for_sorting %in% unique(dat[[3]])])]
- }
-
- if (var3_numeric) {
- result <- result[order(suppressWarnings(as.numeric(names(result))), na.last = TRUE)]
- }
-
- result
-}
-
-### Helper functions called by tabyl() ------------
-
-# function that checks if col 1 name is "n" or "percent",
-## if so modifies the appropriate other column name to avoid duplicates
-handle_if_special_names_used <- function(dat) {
- if (names(dat)[1] == "n") {
- names(dat)[2] <- "n_n"
- } else if (names(dat)[1] == "percent") {
- names(dat)[3] <- "percent_percent"
- }
- dat
-}
-
-# reset the 1st col's class of a data.frame to a provided class
-# also reset in tabyl's core
-reset_1st_col_status <- function(dat, new_class, lvls) {
- if ("factor" %in% new_class) {
- dat[[1]] <- factor(dat[[1]],
- levels = lvls,
- ordered = ("ordered" %in% new_class)
- )
- attr(dat, "core")[[1]] <- factor(attr(dat, "core")[[1]],
- levels = lvls,
- ordered = ("ordered" %in% new_class)
- )
- } else {
- dat[[1]] <- as.character(dat[[1]]) # first do as.character in case eventual class is numeric
- class(dat[[1]]) <- new_class
- attr(dat, "core")[[1]] <- as.character(attr(dat, "core")[[1]])
- class(attr(dat, "core")[[1]]) <- new_class
- }
- dat
-}
diff --git a/R/top_levels.R b/R/top_levels.R
deleted file mode 100644
index 87320c11..00000000
--- a/R/top_levels.R
+++ /dev/null
@@ -1,65 +0,0 @@
-#' Generate a frequency table of a factor grouped into top-n, bottom-n, and all
-#' other levels.
-#'
-#' Get a frequency table of a factor variable, grouped into categories by level.
-#'
-#' @param input_vec The factor variable to tabulate.
-#' @param n Number of levels to include in top and bottom groups
-#' @param show_na Should cases where the variable is `NA` be shown?
-#' @return A `data.frame` (actually a `tbl_df`) with the frequencies of the
-#' grouped, tabulated variable. Includes counts and percentages, and valid
-#' percentages (calculated omitting `NA` values, if present in the vector and
-#' `show_na = TRUE`.)
-#' @export
-#' @examples
-#' top_levels(as.factor(mtcars$hp), 2)
-top_levels <- function(input_vec, n = 2, show_na = FALSE) {
- # Initial type error catching
- if (!is.factor(input_vec)) {
- stop("factor_vec is not of type 'factor'")
- }
-
- num_levels_in_var <- nlevels(input_vec)
-
- # handle bad inputs
- if (!num_levels_in_var > 2) {
- stop("input factor variable must have at least 3 levels")
- }
- if (num_levels_in_var < 2 * n) {
- stop(paste0(
- "there are ", num_levels_in_var, " levels in the variable and ",
- n, " levels in each of the top and bottom groups.\nSince 2 * ", n, " = ", 2 * n,
- " is greater than ", num_levels_in_var, ",
- there would be overlap in the top and bottom groups and some records will be double-counted."
- ))
- }
- if (n < 1 || n %% 1 != 0) {
- stop("n must be a whole number at least 1")
- }
-
- var_name <- deparse(substitute(input_vec))
-
- # Identify top/mid/bottom group labels for printing
- groups <- get_level_groups(input_vec, n, num_levels_in_var)
-
- # convert input vector into grouped variable
- new_vec <- ifelse(as.numeric(input_vec) <= n,
- groups$top,
- ifelse(as.numeric(input_vec) > (num_levels_in_var - n),
- groups$bot,
- groups$mid
- )
- )
-
- # recode variable as hi-med-lo factor so table prints w/ correct sorting
- if (!is.na(groups$mid)) {
- new_vec <- factor(new_vec, levels = c(groups$top, groups$mid, groups$bot))
- } else {
- new_vec <- factor(new_vec, levels = c(groups$top, groups$bot))
- }
-
- # tabulate grouped variable, then reset name to match input variable name
- result <- tabyl(new_vec, show_na = show_na)
- names(result)[1] <- var_name
- result
-}
diff --git a/README.Rmd b/README.Rmd
index ed66eec6..fba821cc 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -143,56 +143,6 @@ roster %>% get_dupes(contains("name"))
Yes, some teachers appear twice. We ought to address this before counting employees.
-#### Tabulating tools
-A variable (or combinations of two or three variables) can be tabulated with `tabyl()`. The resulting data.frame can be tweaked and formatted
-with the suite of `adorn_` functions for quick analysis and printing of pretty results in a report. `adorn_` functions can be helpful with non-tabyls, too.
-
-#### `tabyl()`
-
-Like `table()`, but pipe-able, data.frame-based, and fully featured.
-
-`tabyl()` can be called two ways:
-
-* On a vector, when tabulating a single variable: `tabyl(roster$subject)`
-* On a data.frame, specifying 1, 2, or 3 variable names to tabulate: `roster %>% tabyl(subject, employee_status)`.
- * Here the data.frame is passed in with the `%>%` pipe; this allows `tabyl` to be used in an analysis pipeline
-
-One variable:
-```{r}
-roster %>%
- tabyl(subject)
-```
-
-Two variables:
-```{r}
-roster %>%
- filter(hire_date > as.Date("1950-01-01")) %>%
- tabyl(employee_status, full_time)
-```
-
-Three variables:
-```{r}
-roster %>%
- tabyl(full_time, subject, employee_status, show_missing_levels = FALSE)
-```
-
-#### Adorning tabyls
-The `adorn_` functions dress up the results of these tabulation calls for fast, basic reporting. Here are some of the functions that augment a summary table for reporting:
-
-```{r}
-roster %>%
- tabyl(employee_status, full_time) %>%
- adorn_totals("row") %>%
- adorn_percentages("row") %>%
- adorn_pct_formatting() %>%
- adorn_ns() %>%
- adorn_title("combined")
-```
-
-Pipe that right into `knitr::kable()` in your RMarkdown report.
-
-These modular adornments can be layered to reduce R's deficit against Excel and SPSS when it comes to quick, informative counts. Learn more about `tabyl()` and the `adorn_` functions from the [tabyls vignette](https://sfirke.github.io/janitor/articles/tabyls.html).
-
## Contact me
You are welcome to:
diff --git a/man/add_totals_col.Rd b/man/add_totals_col.Rd
deleted file mode 100644
index c0c0c38c..00000000
--- a/man/add_totals_col.Rd
+++ /dev/null
@@ -1,20 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/janitor_deprecated.R
-\name{add_totals_col}
-\alias{add_totals_col}
-\title{Append a totals column to a data.frame.}
-\usage{
-add_totals_col(dat, na.rm = TRUE)
-}
-\arguments{
-\item{dat}{an input data.frame with at least one numeric column.}
-
-\item{na.rm}{should missing values (including NaN) be omitted from the calculations?}
-}
-\value{
-Returns a data.frame with a totals column containing row-wise sums.
-}
-\description{
-This function is deprecated, use \code{\link[=adorn_totals]{adorn_totals(where = "col")}} instead.
-}
-\keyword{internal}
diff --git a/man/add_totals_row.Rd b/man/add_totals_row.Rd
deleted file mode 100644
index d6dc8883..00000000
--- a/man/add_totals_row.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/janitor_deprecated.R
-\name{add_totals_row}
-\alias{add_totals_row}
-\title{Append a totals row to a data.frame.}
-\usage{
-add_totals_row(dat, fill = "-", na.rm = TRUE)
-}
-\arguments{
-\item{dat}{an input data.frame with at least one numeric column.}
-
-\item{fill}{if there are more than one non-numeric columns, what string should fill the bottom row of those columns?}
-
-\item{na.rm}{should missing values (including NaN) be omitted from the calculations?}
-}
-\value{
-Returns a data.frame with a totals row, consisting of "Total" in the first column and column sums in the others.
-}
-\description{
-This function is deprecated, use \code{\link[=adorn_totals]{adorn_totals()}} instead.
-}
-\keyword{internal}
diff --git a/man/adorn_crosstab.Rd b/man/adorn_crosstab.Rd
deleted file mode 100644
index f3a94e42..00000000
--- a/man/adorn_crosstab.Rd
+++ /dev/null
@@ -1,35 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/janitor_deprecated.R
-\name{adorn_crosstab}
-\alias{adorn_crosstab}
-\title{Add presentation formatting to a crosstabulation table.}
-\usage{
-adorn_crosstab(
- dat,
- denom = "row",
- show_n = TRUE,
- digits = 1,
- show_totals = FALSE,
- rounding = "half to even"
-)
-}
-\arguments{
-\item{dat}{a data.frame with row names in the first column and numeric values in all other columns. Usually the piped-in result of a call to \code{crosstab} that included the argument \code{percent = "none"}.}
-
-\item{denom}{the denominator to use for calculating percentages. One of "row", "col", or "all".}
-
-\item{show_n}{should counts be displayed alongside the percentages?}
-
-\item{digits}{how many digits should be displayed after the decimal point?}
-
-\item{show_totals}{display a totals summary? Will be a row, column, or both depending on the value of \code{denom}.}
-
-\item{rounding}{method to use for truncating percentages - either "half to even", the base R default method, or "half up", where 14.5 rounds up to 15.}
-}
-\value{
-Returns a data.frame.
-}
-\description{
-This function is deprecated, use \code{\link[=tabyl]{tabyl()}} with the \code{adorn_} family of functions instead.
-}
-\keyword{internal}
diff --git a/man/adorn_ns.Rd b/man/adorn_ns.Rd
deleted file mode 100644
index aeaa0b43..00000000
--- a/man/adorn_ns.Rd
+++ /dev/null
@@ -1,82 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/adorn_ns.R
-\name{adorn_ns}
-\alias{adorn_ns}
-\title{Add underlying Ns to a tabyl displaying percentages.}
-\usage{
-adorn_ns(
- dat,
- position = "rear",
- ns = attr(dat, "core"),
- format_func = function(x) {
- format(x, big.mark = ",")
- },
- ...
-)
-}
-\arguments{
-\item{dat}{A data.frame of class \code{tabyl} that has had \code{adorn_percentages} and/or
-\code{adorn_pct_formatting} called on it. If given a list of data.frames,
-this function will apply itself to each data.frame in the list (designed for 3-way \code{tabyl} lists).}
-
-\item{position}{Should the N go in the front, or in the rear, of the percentage?}
-
-\item{ns}{The Ns to append. The default is the "core" attribute of the input tabyl
-\code{dat}, where the original Ns of a two-way \code{tabyl} are stored. However, if your Ns
-are stored somewhere else, or you need to customize them beyond what can be done
-with \code{format_func}, you can supply them here.}
-
-\item{format_func}{A formatting function to run on the Ns. Consider defining
-with \code{\link[base:format]{base::format()}}.}
-
-\item{...}{Columns to adorn. This takes a tidyselect specification. By default,
-all columns are adorned except for the first column and columns not of class
-\code{numeric}, but this allows you to manually specify which columns should be adorned,
-for use on a data.frame that does not result from a call to \code{tabyl}.}
-}
-\value{
-A \code{data.frame} with Ns appended
-}
-\description{
-This function adds back the underlying Ns to a \code{tabyl} whose percentages were
-calculated using \code{\link[=adorn_percentages]{adorn_percentages()}}, to display the Ns and percentages together.
-You can also call it on a non-tabyl data.frame to which you wish to append Ns.
-}
-\examples{
-mtcars \%>\%
- tabyl(am, cyl) \%>\%
- adorn_percentages("col") \%>\%
- adorn_pct_formatting() \%>\%
- adorn_ns(position = "front")
-
-# Format the Ns with a custom format_func:
-set.seed(1)
-bigger_dat <- data.frame(
- sex = rep(c("m", "f"), 3000),
- age = round(runif(3000, 1, 102), 0)
-)
-bigger_dat$age_group <- cut(bigger_dat$age, quantile(bigger_dat$age, c(0, 1 / 3, 2 / 3, 1)))
-
-bigger_dat \%>\%
- tabyl(age_group, sex, show_missing_levels = FALSE) \%>\%
- adorn_totals(c("row", "col")) \%>\%
- adorn_percentages("col") \%>\%
- adorn_pct_formatting(digits = 1) \%>\%
- adorn_ns(format_func = function(x) format(x, big.mark = ".", decimal.mark = ","))
-# Control the columns to be adorned with the ... variable selection argument
-# If using only the ... argument, you can use empty commas as shorthand
-# to supply the default values to the preceding arguments:
-
-cases <- data.frame(
- region = c("East", "West"),
- year = 2015,
- recovered = c(125, 87),
- died = c(13, 12)
-)
-
-cases \%>\%
- adorn_percentages("col",,recovered:died) \%>\%
- adorn_pct_formatting(,,,,,recovered:died) \%>\%
- adorn_ns(,,,recovered:died)
-
-}
diff --git a/man/adorn_pct_formatting.Rd b/man/adorn_pct_formatting.Rd
deleted file mode 100644
index 13a49c6a..00000000
--- a/man/adorn_pct_formatting.Rd
+++ /dev/null
@@ -1,71 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/adorn_pct_formatting.R
-\name{adorn_pct_formatting}
-\alias{adorn_pct_formatting}
-\title{Format a \code{data.frame} of decimals as percentages.}
-\usage{
-adorn_pct_formatting(
- dat,
- digits = 1,
- rounding = "half to even",
- affix_sign = TRUE,
- ...
-)
-}
-\arguments{
-\item{dat}{a data.frame with decimal values, typically the result of a call
-to \code{adorn_percentages} on a \code{tabyl}. If given a list of data.frames, this
-function will apply itself to each data.frame in the list (designed for
-3-way \code{tabyl} lists).}
-
-\item{digits}{how many digits should be displayed after the decimal point?}
-
-\item{rounding}{method to use for rounding - either "half to even", the base
-R default method, or "half up", where 14.5 rounds up to 15.}
-
-\item{affix_sign}{should the \% sign be affixed to the end?}
-
-\item{...}{columns to adorn. This takes a tidyselect specification. By
-default, all numeric columns (besides the initial column, if numeric) are
-adorned, but this allows you to manually specify which columns should be
-adorned, for use on a data.frame that does not result from a call to
-\code{tabyl}.}
-}
-\value{
-a data.frame with formatted percentages
-}
-\description{
-Numeric columns get multiplied by 100 and formatted as
-percentages according to user specifications. This function defaults to
-excluding the first column of the input data.frame, assuming that it contains
-a descriptive variable, but this can be overridden by specifying the columns
-to adorn in the \code{...} argument. Non-numeric columns are always excluded.
-
-The decimal separator character is the result of \code{getOption("OutDec")}, which
-is based on the user's locale. If the default behavior is undesirable,
-change this value ahead of calling the function, either by changing locale or
-with \code{options(OutDec = ",")}. This aligns the decimal separator character
-with that used in \code{base::print()}.
-}
-\examples{
-mtcars \%>\%
- tabyl(am, cyl) \%>\%
- adorn_percentages("col") \%>\%
- adorn_pct_formatting()
-
-# Control the columns to be adorned with the ... variable selection argument
-# If using only the ... argument, you can use empty commas as shorthand
-# to supply the default values to the preceding arguments:
-
-cases <- data.frame(
- region = c("East", "West"),
- year = 2015,
- recovered = c(125, 87),
- died = c(13, 12)
-)
-
-cases \%>\%
- adorn_percentages("col", , recovered:died) \%>\%
- adorn_pct_formatting(, , , recovered:died)
-
-}
diff --git a/man/adorn_percentages.Rd b/man/adorn_percentages.Rd
deleted file mode 100644
index 0d6b8714..00000000
--- a/man/adorn_percentages.Rd
+++ /dev/null
@@ -1,57 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/adorn_percentages.R
-\name{adorn_percentages}
-\alias{adorn_percentages}
-\title{Convert a data.frame of counts to percentages.}
-\usage{
-adorn_percentages(dat, denominator = "row", na.rm = TRUE, ...)
-}
-\arguments{
-\item{dat}{A \code{tabyl} or other data.frame with a tabyl-like layout.
-If given a list of data.frames, this function will apply itself to each
-\code{data.frame} in the list (designed for 3-way \code{tabyl} lists).}
-
-\item{denominator}{The direction to use for calculating percentages.
-One of "row", "col", or "all".}
-
-\item{na.rm}{should missing values (including \code{NaN}) be omitted from the calculations?}
-
-\item{...}{columns to adorn. This takes a <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}>
-specification. By default, all numeric columns (besides the initial column, if numeric)
-are adorned, but this allows you to manually specify which columns should
-be adorned, for use on a \code{data.frame} that does not result from a call to \code{\link[=tabyl]{tabyl()}}.}
-}
-\value{
-A \code{data.frame} of percentages, expressed as numeric values between 0 and 1.
-}
-\description{
-This function defaults to excluding the first column of the input data.frame,
-assuming that it contains a descriptive variable, but this can be overridden
-by specifying the columns to adorn in the \code{...} argument.
-}
-\examples{
-
-mtcars \%>\%
- tabyl(am, cyl) \%>\%
- adorn_percentages("col")
-
-# calculates correctly even with totals column and/or row:
-mtcars \%>\%
- tabyl(am, cyl) \%>\%
- adorn_totals("row") \%>\%
- adorn_percentages()
-
-# Control the columns to be adorned with the ... variable selection argument
-# If using only the ... argument, you can use empty commas as shorthand
-# to supply the default values to the preceding arguments:
-
-cases <- data.frame(
- region = c("East", "West"),
- year = 2015,
- recovered = c(125, 87),
- died = c(13, 12)
-)
-
-cases \%>\%
- adorn_percentages(, , recovered:died)
-}
diff --git a/man/adorn_rounding.Rd b/man/adorn_rounding.Rd
deleted file mode 100644
index 409ce000..00000000
--- a/man/adorn_rounding.Rd
+++ /dev/null
@@ -1,67 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/adorn_rounding.R
-\name{adorn_rounding}
-\alias{adorn_rounding}
-\title{Round the numeric columns in a data.frame.}
-\usage{
-adorn_rounding(dat, digits = 1, rounding = "half to even", ...)
-}
-\arguments{
-\item{dat}{A \code{tabyl} or other \code{data.frame} with similar layout.
-If given a list of data.frames, this function will apply itself to each
-\code{data.frame} in the list (designed for 3-way \code{tabyl} lists).}
-
-\item{digits}{How many digits should be displayed after the decimal point?}
-
-\item{rounding}{Method to use for rounding - either "half to even"
-(the base R default method), or "half up", where 14.5 rounds up to 15.}
-
-\item{...}{Columns to adorn. This takes a tidyselect specification.
-By default, all numeric columns (besides the initial column, if numeric)
-are adorned, but this allows you to manually specify which columns should
-be adorned, for use on a data.frame that does not result from a call to \code{tabyl}.}
-}
-\value{
-The \code{data.frame} with rounded numeric columns.
-}
-\description{
-Can run on any \code{data.frame} with at least one numeric column.
-This function defaults to excluding the first column of the input data.frame,
-assuming that it contains a descriptive variable, but this can be overridden by
-specifying the columns to round in the \code{...} argument.
-
-If you're formatting percentages, e.g., the result of \code{\link[=adorn_percentages]{adorn_percentages()}},
-use \code{\link[=adorn_pct_formatting]{adorn_pct_formatting()}} instead. This is a more flexible variant for ad-hoc usage.
-Compared to \code{adorn_pct_formatting()}, it does not multiply by 100 or pad the
-numbers with spaces for alignment in the results \code{data.frame}.
-This function retains the class of numeric input columns.
-}
-\examples{
-
-mtcars \%>\%
- tabyl(am, cyl) \%>\%
- adorn_percentages() \%>\%
- adorn_rounding(digits = 2, rounding = "half up")
-
-# tolerates non-numeric columns:
-library(dplyr)
-mtcars \%>\%
- tabyl(am, cyl) \%>\%
- adorn_percentages("all") \%>\%
- mutate(dummy = "a") \%>\%
- adorn_rounding()
-
-# Control the columns to be adorned with the ... variable selection argument
-# If using only the ... argument, you can use empty commas as shorthand
-# to supply the default values to the preceding arguments:
-cases <- data.frame(
- region = c("East", "West"),
- year = 2015,
- recovered = c(125, 87),
- died = c(13, 12)
-)
-
-cases \%>\%
- adorn_percentages(, , ends_with("ed")) \%>\%
- adorn_rounding(, , all_of(c("recovered", "died")))
-}
diff --git a/man/adorn_title.Rd b/man/adorn_title.Rd
deleted file mode 100644
index 0bdd54f3..00000000
--- a/man/adorn_title.Rd
+++ /dev/null
@@ -1,58 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/adorn_title.R
-\name{adorn_title}
-\alias{adorn_title}
-\title{Add column name to the top of a two-way tabyl.}
-\usage{
-adorn_title(dat, placement = "top", row_name, col_name)
-}
-\arguments{
-\item{dat}{A \code{data.frame} of class \code{tabyl} or other \code{data.frame} with a tabyl-like layout.
-If given a list of data.frames, this function will apply itself to each \code{data.frame}
-in the list (designed for 3-way \code{tabyl} lists).}
-
-\item{placement}{The title placement, one of \code{"top"}, or \code{"combined"}.
-See \strong{Details} for more information.}
-
-\item{row_name}{(optional) default behavior is to pull the row name from the
-attributes of the input \code{tabyl} object. If you wish to override that text,
-or if your input is not a \code{tabyl}, supply a string here.}
-
-\item{col_name}{(optional) default behavior is to pull the column_name from
-the attributes of the input \code{tabyl} object. If you wish to override that text,
-or if your input is not a \code{tabyl}, supply a string here.}
-}
-\value{
-The input \code{tabyl}, augmented with the column title. Non-tabyl inputs
-that are of class \code{tbl_df} are downgraded to basic data.frames so that the
-title row prints correctly.
-}
-\description{
-This function adds the column variable name to the top of a \code{tabyl} for a
-complete display of information. This makes the tabyl prettier, but renders
-the \code{data.frame} less useful for further manipulation.
-}
-\details{
-The \code{placement} argument indicates whether the column name should be added to
-the \code{top} of the tabyl in an otherwise-empty row \code{"top"} or appended to the
-already-present row name variable (\code{"combined"}). The formatting in the \code{"top"}
-option has the look of base R's \code{table()}; it also wipes out the other column
-names, making it hard to further use the \code{data.frame} besides formatting it for reporting.
-The \code{"combined"} option is more conservative in this regard.
-}
-\examples{
-
-mtcars \%>\%
- tabyl(am, cyl) \%>\%
- adorn_title(placement = "top")
-
-# Adding a title to a non-tabyl
-library(tidyr)
-library(dplyr)
-mtcars \%>\%
- group_by(gear, am) \%>\%
- summarise(avg_mpg = mean(mpg), .groups = "drop") \%>\%
- pivot_wider(names_from = am, values_from = avg_mpg) \%>\%
- adorn_rounding() \%>\%
- adorn_title("top", row_name = "Gears", col_name = "Cylinders")
-}
diff --git a/man/adorn_totals.Rd b/man/adorn_totals.Rd
deleted file mode 100644
index 2f6a55ef..00000000
--- a/man/adorn_totals.Rd
+++ /dev/null
@@ -1,49 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/adorn_totals.R
-\name{adorn_totals}
-\alias{adorn_totals}
-\title{Append a totals row and/or column to a data.frame}
-\usage{
-adorn_totals(dat, where = "row", fill = "-", na.rm = TRUE, name = "Total", ...)
-}
-\arguments{
-\item{dat}{An input \code{data.frame} with at least one numeric column. If given a
-list of data.frames, this function will apply itself to each \code{data.frame}
-in the list (designed for 3-way \code{tabyl} lists).}
-
-\item{where}{One of "row", "col", or \code{c("row", "col")}}
-
-\item{fill}{If there are non-numeric columns, what should fill the bottom row
-of those columns? If a string, relevant columns will be coerced to character.
-If \code{NA} then column types are preserved.}
-
-\item{na.rm}{Should missing values (including \code{NaN}) be omitted from the calculations?}
-
-\item{name}{Name of the totals row and/or column. If both are created, and
-\code{name} is a single string, that name is applied to both. If both are created
-and \code{name} is a vector of length 2, the first element of the vector will be
-used as the row name (in column 1), and the second element will be used as the
-totals column name. Defaults to "Total".}
-
-\item{...}{Columns to total. This takes a tidyselect specification. By default,
-all numeric columns (besides the initial column, if numeric) are included in
-the totals, but this allows you to manually specify which columns should be
-included, for use on a data.frame that does not result from a call to \code{tabyl}.}
-}
-\value{
-A \code{data.frame} augmented with a totals row, column, or both.
-The \code{data.frame} is now also of class \code{tabyl} and stores information about
-the attached totals and underlying data in the tabyl attributes.
-}
-\description{
-This function defaults to excluding the first column of the input data.frame,
-assuming that it contains a descriptive variable, but this can be overridden
-by specifying the columns to be totaled in the \code{...} argument. Non-numeric
-columns are converted to character class and have a user-specified fill character
-inserted in the totals row.
-}
-\examples{
-mtcars \%>\%
- tabyl(am, cyl) \%>\%
- adorn_totals()
-}
diff --git a/man/as_tabyl.Rd b/man/as_tabyl.Rd
deleted file mode 100644
index 635898d1..00000000
--- a/man/as_tabyl.Rd
+++ /dev/null
@@ -1,55 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/as_and_untabyl.R
-\name{as_tabyl}
-\alias{as_tabyl}
-\title{Add \code{tabyl} attributes to a data.frame}
-\usage{
-as_tabyl(dat, axes = 2, row_var_name = NULL, col_var_name = NULL)
-}
-\arguments{
-\item{dat}{a data.frame with variable values in the first column and numeric
-values in all other columns.}
-
-\item{axes}{is this a two_way tabyl or a one_way tabyl? If this function is
-being called by a user, this should probably be "2". One-way tabyls are
-created by \code{tabyl} but are a special case.}
-
-\item{row_var_name}{(optional) the name of the variable in the row dimension;
-used by \code{adorn_title()}.}
-
-\item{col_var_name}{(optional) the name of the variable in the column
-dimension; used by \code{adorn_title()}.}
-}
-\value{
-Returns the same data.frame, but with the additional class of "tabyl"
-and the attribute "core".
-}
-\description{
-A \code{tabyl} is a \code{data.frame} containing counts of a variable or
-co-occurrences of two variables (a.k.a., a contingency table or crosstab).
-This specialized kind of data.frame has attributes that enable \code{adorn_}
-functions to be called for precise formatting and presentation of results.
-E.g., display results as a mix of percentages, Ns, add totals rows or
-columns, rounding options, in the style of Microsoft Excel PivotTable.
-
-A \code{tabyl} can be the result of a call to \code{janitor::tabyl()}, in which case
-these attributes are added automatically. This function adds \code{tabyl} class
-attributes to a data.frame that isn't the result of a call to \code{tabyl} but
-meets the requirements of a two-way tabyl: 1) First column contains values of
-variable 1 2) Column names 2:n are the values of variable 2 3) Numeric values
-in columns 2:n are counts of the co-occurrences of the two variables.*
-\itemize{
-\item = this is the ideal form of a \code{tabyl}, but janitor's \code{adorn_} functions tolerate
-and ignore non-numeric columns in positions 2:n.
-}
-
-For instance, the result of \code{\link[dplyr:count]{dplyr::count()}} followed by \code{\link[tidyr:pivot_wider]{tidyr::pivot_wider()}}
-can be treated as a \code{tabyl}.
-
-The result of calling \code{\link[=tabyl]{tabyl()}} on a single variable is a special class of
-one-way tabyl; this function only pertains to the two-way tabyl.
-}
-\examples{
-as_tabyl(mtcars)
-
-}
diff --git a/man/chisq.test.Rd b/man/chisq.test.Rd
deleted file mode 100644
index 675d948b..00000000
--- a/man/chisq.test.Rd
+++ /dev/null
@@ -1,40 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/statistical_tests.R
-\name{chisq.test}
-\alias{chisq.test}
-\alias{chisq.test.default}
-\alias{chisq.test.tabyl}
-\title{Apply \code{stats::chisq.test()} to a two-way tabyl}
-\usage{
-chisq.test(x, ...)
-
-\method{chisq.test}{default}(x, y = NULL, ...)
-
-\method{chisq.test}{tabyl}(x, tabyl_results = TRUE, ...)
-}
-\arguments{
-\item{x}{a two-way tabyl, a numeric vector or a factor}
-
-\item{...}{other parameters passed to \code{\link[stats:chisq.test]{stats::chisq.test()}}}
-
-\item{y}{if x is a vector, must be another vector or factor of the same length}
-
-\item{tabyl_results}{If \code{TRUE} and \code{x} is a tabyl object,
-also return \code{observed}, \code{expected}, \code{residuals} and \code{stdres} as tabyl.}
-}
-\value{
-The result is the same as the one of \code{stats::chisq.test()}.
-If \code{tabyl_results} is \code{TRUE}, the returned tables \code{observed}, \code{expected},
-\code{residuals} and \code{stdres} are converted to tabyls.
-}
-\description{
-This generic function overrides \code{stats::chisq.test}. If the passed table
-is a two-way tabyl, it runs it through janitor::chisq.test.tabyl, otherwise
-it just calls \code{stats::chisq.test()}.
-}
-\examples{
-tab <- tabyl(mtcars, gear, cyl)
-chisq.test(tab)
-chisq.test(tab)$residuals
-
-}
diff --git a/man/crosstab.Rd b/man/crosstab.Rd
deleted file mode 100644
index 32f44354..00000000
--- a/man/crosstab.Rd
+++ /dev/null
@@ -1,15 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/janitor_deprecated.R
-\name{crosstab}
-\alias{crosstab}
-\title{Generate a crosstabulation of two vectors.}
-\usage{
-crosstab(...)
-}
-\arguments{
-\item{...}{arguments}
-}
-\description{
-This function is deprecated, use \code{\link[=tabyl]{tabyl(dat, var1, var2)}} instead.
-}
-\keyword{internal}
diff --git a/man/fisher.test.Rd b/man/fisher.test.Rd
deleted file mode 100644
index dd54d990..00000000
--- a/man/fisher.test.Rd
+++ /dev/null
@@ -1,34 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/statistical_tests.R
-\name{fisher.test}
-\alias{fisher.test}
-\alias{fisher.test.default}
-\alias{fisher.test.tabyl}
-\title{Apply \code{stats::fisher.test()} to a two-way tabyl}
-\usage{
-fisher.test(x, ...)
-
-\method{fisher.test}{default}(x, y = NULL, ...)
-
-\method{fisher.test}{tabyl}(x, ...)
-}
-\arguments{
-\item{x}{A two-way tabyl, a numeric vector or a factor}
-
-\item{...}{Parameters passed to \code{\link[stats:fisher.test]{stats::fisher.test()}}}
-
-\item{y}{if x is a vector, must be another vector or factor of the same length}
-}
-\value{
-The same as the one of \code{stats::fisher.test()}.
-}
-\description{
-This generic function overrides \code{\link[stats:fisher.test]{stats::fisher.test()}}. If the passed table
-is a two-way tabyl, it runs it through \code{janitor::fisher.test.tabyl}, otherwise
-it just calls \code{stats::fisher.test()}.
-}
-\examples{
-tab <- tabyl(mtcars, gear, cyl)
-fisher.test(tab)
-
-}
diff --git a/man/janitor_deprecated.Rd b/man/janitor_deprecated.Rd
index 6b9cb5c2..c0906722 100644
--- a/man/janitor_deprecated.Rd
+++ b/man/janitor_deprecated.Rd
@@ -8,12 +8,8 @@ These functions have already become defunct or may be defunct as soon as the nex
}
\details{
\itemize{
-\item \code{\link[=adorn_crosstab]{adorn_crosstab()}} -> \code{adorn_}
-\item \code{\link[=crosstab]{crosstab()}} -> \code{\link[=tabyl]{tabyl()}}
\item \code{\link[=use_first_valid_of]{use_first_valid_of()}} -> \code{\link[dplyr:coalesce]{dplyr::coalesce()}}
\item \code{\link[=convert_to_NA]{convert_to_NA()}} -> \code{\link[dplyr:na_if]{dplyr::na_if()}}
-\item \code{\link[=add_totals_col]{add_totals_col()}} -> \code{\link[=adorn_totals]{adorn_totals(where = "col")}}
-\item \code{\link[=add_totals_row]{add_totals_row()}} -> \code{\link[=adorn_totals]{adorn_totals()}}
\item \code{\link[=remove_empty_rows]{remove_empty_rows()}} -> \code{\link[=remove_empty]{remove_empty("rows")}}
\item \code{\link[=remove_empty_cols]{remove_empty_cols()}} -> \code{\link[=remove_empty]{remove_empty("cols")}}
}
diff --git a/man/remove_empty.Rd b/man/remove_empty.Rd
index 66652c27..f9d14dce 100644
--- a/man/remove_empty.Rd
+++ b/man/remove_empty.Rd
@@ -13,8 +13,8 @@ remove_empty(dat, which = c("rows", "cols"), cutoff = 1, quiet = TRUE)
value of which is provided, defaults to removing both empty rows and empty
columns, declaring the behavior with a printed message.}
-\item{cutoff}{Under what fraction (>0 to <=1) of non-empty rows or columns should
-\code{which} be removed? Lower values keep more rows/columns, higher values drop more.}
+\item{cutoff}{a row/col should have more than this fraction of non-NA values to be
+retained. E.g., \code{cutoff = 0.8} means that rows/cols that are 20\% or more missing will be dropped.}
\item{quiet}{Should messages be suppressed (\code{TRUE}) or printed
(\code{FALSE}) indicating the summary of empty columns or rows removed?}
diff --git a/man/tabyl.Rd b/man/tabyl.Rd
deleted file mode 100644
index 9b4569fe..00000000
--- a/man/tabyl.Rd
+++ /dev/null
@@ -1,70 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/tabyl.R
-\name{tabyl}
-\alias{tabyl}
-\alias{tabyl.default}
-\alias{tabyl.data.frame}
-\title{Generate a frequency table (1-, 2-, or 3-way).}
-\usage{
-tabyl(dat, ...)
-
-\method{tabyl}{default}(dat, show_na = TRUE, show_missing_levels = TRUE, ...)
-
-\method{tabyl}{data.frame}(dat, var1, var2, var3, show_na = TRUE, show_missing_levels = TRUE, ...)
-}
-\arguments{
-\item{dat}{A \code{data.frame} containing the variables you wish to count.
-Or, a vector you want to tabulate.}
-
-\item{...}{Additional arguments passed to methods.}
-
-\item{show_na}{Should counts of \code{NA} values be displayed? In a one-way tabyl,
-the presence of \code{NA} values triggers an additional column showing valid percentages
-(calculated excluding \code{NA} values).}
-
-\item{show_missing_levels}{Should counts of missing levels of factors be displayed?
-These will be rows and/or columns of zeroes. Useful for keeping consistent
-output dimensions even when certain factor levels may not be present in the data.}
-
-\item{var1}{The column name of the first variable.}
-
-\item{var2}{(optional) the column name of the second variable
-(its values become the column names in a 2-way tabulation).}
-
-\item{var3}{(optional) the column name of the third variable
-(a 3-way tabulation is split into a list on its values).}
-}
-\value{
-A \code{data.frame} with frequencies and percentages of the tabulated variable(s).
-A 3-way tabulation returns a list of data frames.
-}
-\description{
-A fully-featured alternative to \code{table()}. Results are data.frames and can be
-formatted and enhanced with janitor's family of \code{adorn_} functions.
-
-Specify a \code{data.frame} and the one, two, or three unquoted column names you
-want to tabulate. Three variables generates a list of 2-way tabyls,
-split by the third variable.
-
-Alternatively, you can tabulate a single variable that isn't in a \code{data.frame}
-by calling \code{tabyl()} on a vector, e.g., \code{tabyl(mtcars$gear)}.
-}
-\examples{
-
-tabyl(mtcars, cyl)
-tabyl(mtcars, cyl, gear)
-tabyl(mtcars, cyl, gear, am)
-
-# or using the \%>\% pipe
-mtcars \%>\%
- tabyl(cyl, gear)
-
-# illustrating show_na functionality:
-my_cars <- rbind(mtcars, rep(NA, 11))
-my_cars \%>\% tabyl(cyl)
-my_cars \%>\% tabyl(cyl, show_na = FALSE)
-
-# Calling on a single vector not in a data.frame:
-val <- c("hi", "med", "med", "lo")
-tabyl(val)
-}
diff --git a/man/top_levels.Rd b/man/top_levels.Rd
deleted file mode 100644
index 821c02c9..00000000
--- a/man/top_levels.Rd
+++ /dev/null
@@ -1,28 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/top_levels.R
-\name{top_levels}
-\alias{top_levels}
-\title{Generate a frequency table of a factor grouped into top-n, bottom-n, and all
-other levels.}
-\usage{
-top_levels(input_vec, n = 2, show_na = FALSE)
-}
-\arguments{
-\item{input_vec}{The factor variable to tabulate.}
-
-\item{n}{Number of levels to include in top and bottom groups}
-
-\item{show_na}{Should cases where the variable is \code{NA} be shown?}
-}
-\value{
-A \code{data.frame} (actually a \code{tbl_df}) with the frequencies of the
-grouped, tabulated variable. Includes counts and percentages, and valid
-percentages (calculated omitting \code{NA} values, if present in the vector and
-\code{show_na = TRUE}.)
-}
-\description{
-Get a frequency table of a factor variable, grouped into categories by level.
-}
-\examples{
-top_levels(as.factor(mtcars$hp), 2)
-}
diff --git a/man/untabyl.Rd b/man/untabyl.Rd
deleted file mode 100644
index aa8da644..00000000
--- a/man/untabyl.Rd
+++ /dev/null
@@ -1,24 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/as_and_untabyl.R
-\name{untabyl}
-\alias{untabyl}
-\title{Remove \code{tabyl} attributes from a data.frame.}
-\usage{
-untabyl(dat)
-}
-\arguments{
-\item{dat}{a \code{data.frame} of class \code{tabyl}.}
-}
-\value{
-the same \code{data.frame}, but without the \code{tabyl} class and attributes.
-}
-\description{
-Strips away all \code{tabyl}-related attributes from a data.frame.
-}
-\examples{
-
-mtcars \%>\%
- tabyl(am) \%>\%
- untabyl() \%>\%
- attributes() # tabyl-specific attributes are gone
-}
diff --git a/tests/testthat/test-adorn-ns.R b/tests/testthat/test-adorn-ns.R
deleted file mode 100644
index ebc28770..00000000
--- a/tests/testthat/test-adorn-ns.R
+++ /dev/null
@@ -1,244 +0,0 @@
-source_an <- tibble::tibble(
- x = c(rep("a", 500), "b", "b", "c", "d"),
- y = rep(c(0, 0, 0, 0, 0, 1), 84)
-) %>%
- tabyl(x, y)
-
-
-test_that("spacing is correct", {
- expect_equal(
- source_an %>%
- adorn_totals() %>%
- adorn_percentages("all") %>%
- adorn_pct_formatting() %>%
- adorn_ns() %>%
- untabyl(),
- data.frame(
- x = c(letters[1:4], "Total"),
- `0` = c("82.7% (417)", "0.4% (2)", "0.2% (1)", "0.0% (0)", "83.3% (420)"),
- `1` = c("16.5% (83)", "0.0% (0)", "0.0% (0)", "0.2% (1)", "16.7% (84)"),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("front parameter works", {
- expect_equal(
- source_an %>%
- adorn_totals() %>%
- adorn_percentages("all") %>%
- adorn_pct_formatting() %>%
- adorn_ns("front") %>%
- untabyl(),
- data.frame(
- x = c(letters[1:4], "Total"),
- `0` = c("417 (82.7%)", "2 (0.4%)", "1 (0.2%)", "0 (0.0%)", "420 (83.3%)"),
- `1` = c("83 (16.5%)", "0 (0.0%)", "0 (0.0%)", "1 (0.2%)", "84 (16.7%)"),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("bad inputs are caught", {
- expect_error(mtcars %>% adorn_ns(),
- "argument \"ns\" cannot be null; if not calling adorn_ns() on a data.frame of class \"tabyl\", pass your own value for ns",
- fixed = TRUE
- )
- expect_error(
- mtcars %>% tabyl(am, cyl) %>% adorn_ns("huh"),
- "`position` must be one of \"front\" or \"rear\", not \"huh\""
- )
- expect_error(
- mtcars %>% tabyl(am, cyl) %>% adorn_ns(ns = mtcars$mpg),
- "if supplying a value to the ns argument, it must be of class data.frame"
- )
- reg_df <- mtcars %>% tabyl(am, cyl)
- wide_df <- mtcars %>% tabyl(am, cyl)
- wide_df$extra <- c(10, 20)
- expect_error(
- adorn_ns(reg_df, ns = wide_df),
- "if supplying your own data.frame of Ns to append, its dimensions must match those of the data.frame in the \"dat\" argument"
- )
- expect_warning(mtcars %>% tabyl(cyl) %>% adorn_ns(),
- "adorn_ns() is meant to be called on a two_way tabyl; consider combining columns of a one_way tabyl with tidyr::unite()",
- fixed = TRUE
- )
-})
-
-test_that("attributes make it through unaltered", {
- expect_equal(
- attributes(
- source_an %>%
- adorn_totals() %>%
- adorn_percentages("all") %>%
- adorn_pct_formatting() %>%
- adorn_ns("front") # with adorn_ns
- ),
- attributes(
- source_an %>%
- adorn_totals() %>%
- adorn_percentages("all") %>%
- adorn_pct_formatting() # without adorn_ns
- )
- )
-})
-
-test_that("works on smallest tabyls", {
- expect_equal(
- mtcars %>%
- dplyr::slice(1) %>%
- tabyl(am, cyl) %>%
- dplyr::rename(new_var_name = `6`) %>%
- adorn_percentages() %>%
- adorn_pct_formatting() %>%
- adorn_ns() %>%
- untabyl(),
- data.frame(
- am = 1,
- new_var_name = "100.0% (1)", stringsAsFactors = FALSE
- )
- )
-})
-
-
-test_that("users can supply own Ns", {
- # make tabyl with thousands, convert to Ks to append
- big_tabyl <- data.frame(
- a = rep(c("x", rep("y", 9)), 999),
- b = rep(c("big", "big", "big", "small", "small"), 1998),
- stringsAsFactors = FALSE
- ) %>%
- tabyl(a, b)
-
- custom_Ns <- big_tabyl %>%
- dplyr::mutate(
- big = paste0(round(big / 1000, 1), "k"),
- small = paste0(round(small / 1000, 1), "k")
- )
-
- expect_equal(
- big_tabyl %>%
- adorn_percentages("col") %>%
- adorn_pct_formatting() %>%
- adorn_ns(ns = custom_Ns) %>%
- untabyl(),
- data.frame(
- a = c("x", "y"),
- big = c("16.7% (1k)", "83.3% (5k)"),
- small = c("0.0% (0k)", "100.0% (4k)"),
- stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("automatically invokes purrr::map when called on a 3-way tabyl", {
- three <- tabyl(mtcars, cyl, am, gear) %>%
- adorn_percentages() %>%
- adorn_pct_formatting()
- expect_equal(
- adorn_ns(three), # vanilla call
- purrr::map(three, adorn_ns)
- )
-
- # with arguments passing through
- expect_equal(
- adorn_ns(three, "front"),
- purrr::map(three, adorn_ns, "front")
- )
-})
-
-test_that("non-data.frame inputs are handled", {
- expect_error(adorn_ns(1:5), "adorn_ns() must be called on a data.frame or list of data.frames", fixed = TRUE)
-})
-
-test_that("multiple character columns in a tabyl are left untouched", {
- small_with_char <- data.frame(
- x = letters[1:2],
- a = 1:2,
- b = 3:4,
- text = "text",
- stringsAsFactors = FALSE
- )
- expect_equal(
- small_with_char %>%
- adorn_percentages() %>%
- dplyr::pull(text),
- c("text", "text")
- )
-})
-
-test_that("works with tidyselect", {
- simple_percs <- source_an %>% adorn_percentages()
- one_adorned <- simple_percs %>% adorn_ns(, , , `1`)
- expect_equal(
- simple_percs[, 1:2],
- one_adorned[, 1:2]
- )
- expect_equal(
- one_adorned[[3]],
- c("0.166 (83)", "0.000 (0)", "0.000 (0)", "1.000 (1)")
- )
-})
-
-test_that("no message thrown on grouped df input", {
- expect_silent(source_an %>%
- adorn_percentages() %>%
- adorn_ns())
-})
-
-test_that("adorn_ns works on single column data.frame with custom Ns if tidyselect is used, #456", {
- adorned_single <- mtcars %>%
- tabyl(am, cyl) %>%
- adorn_percentages()
- adorned_single <-
- adorned_single %>%
- dplyr::select(a = `4`) %>%
- adorn_ns(ns = dplyr::select(attr(adorned_single, "core"), a = `4`), , , , a)
- expect_equal(stringr::str_sub(adorned_single$a, -4, -1), c(" (3)", " (8)"))
-})
-
-# This tests the display of the decimal.mark by forcing a decimal into a tabyl
-# Can't happen with a natural table, but maybe someone will use adorn_ns on a homespun data.frame
-test_that("formatting function works, (#444)", {
- set.seed(1)
- bigger_dat <- data.frame(
- sex = rep(c("m", "f"), 3000),
- age = round(runif(3000, 1, 102), 0)
- )
- bigger_dat$age_group <- cut(bigger_dat$age, quantile(bigger_dat$age, c(0, 1 / 3, 2 / 3, 1)))
-
- bigger_tab <- bigger_dat %>%
- tabyl(age_group, sex, show_missing_levels = FALSE)
-
- standard_output <- bigger_tab %>%
- adorn_percentages("col") %>%
- adorn_pct_formatting(digits = 1) %>%
- adorn_ns(position = "front")
-
- # test commas in thousands place by default
- expect_equal(
- standard_output$f,
- c("1,018 (33.9%)", "990 (33.0%)", "980 (32.7%)", "12 (0.4%)")
- )
-
- # Test decimal mark
- bigger_tab$f[1] <- 1018.5 # makes no sense in a tabyl but need for testing decimal mark display
-
- bigger_result <- bigger_tab %>%
- untabyl() %>% # to get the decimal into the core
- as_tabyl() %>%
- adorn_totals(c("row", "col")) %>%
- adorn_percentages("col") %>%
- adorn_pct_formatting(digits = 1) %>%
- adorn_ns(position = "rear", format_func = function(x) format(x, big.mark = ".", decimal.mark = ","))
-
- expect_equal(
- bigger_result$f,
- c(
- "33.9% (1.018,5)", "33.0% (990,0)", "32.7% (980,0)", "0.4% (12,0)",
- "100.0% (3.000,5)"
- )
- )
-})
diff --git a/tests/testthat/test-adorn-pct-formatting.R b/tests/testthat/test-adorn-pct-formatting.R
deleted file mode 100644
index 2bc461c9..00000000
--- a/tests/testthat/test-adorn-pct-formatting.R
+++ /dev/null
@@ -1,196 +0,0 @@
-source1 <- mtcars %>%
- tabyl(cyl, am) %>%
- adorn_percentages()
-
-test_that("calculations are accurate", {
- expect_equal(
- untabyl(adorn_pct_formatting(source1)), # default parameter is denom = "row"
- data.frame(
- cyl = c(4, 6, 8),
- `0` = c("27.3%", "57.1%", "85.7%"),
- `1` = c("72.7%", "42.9%", "14.3%"),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("data.frames with no numeric columns beyond the first cause failure", {
- expect_error(
- adorn_pct_formatting(data.frame(a = 1:2, b = c("hi", "lo"))),
- "at least one targeted column must be of class numeric",
- fixed = TRUE
- )
-})
-
-dat <- data.frame(Operation = c("Login", "Posted", "Deleted"), `Total Count` = c(5, 25, 40), check.names = FALSE)
-
-test_that("works with a single numeric column per #89", {
- expect_equal(
- dat %>% adorn_percentages("col") %>% untabyl(),
- data.frame(
- Operation = c("Login", "Posted", "Deleted"),
- `Total Count` = c(5 / 70, 25 / 70, 40 / 70),
- check.names = FALSE
- )
- )
-})
-
-test_that("works with totals row", {
- expect_equal(
- dat %>% adorn_totals("row") %>% adorn_percentages("col") %>% untabyl(),
- data.frame(
- Operation = c("Login", "Posted", "Deleted", "Total"),
- `Total Count` = c(5 / 70, 25 / 70, 40 / 70, 1),
- check.names = FALSE, stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("works with one-way tabyl", {
- expect_equal(
- mtcars %>%
- tabyl(carb) %>%
- adorn_pct_formatting(digits = 0) %>%
- untabyl(),
- data.frame(
- carb = c(1:4, 6, 8),
- n = c(7, 10, 3, 10, 1, 1),
- percent = c("22%", "31%", "9%", "31%", "3%", "3%"),
- stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("NAs are replaced with dashes when percentage signs are affixed", {
- # NaNs from adorn_percentages, the more common case (still uncommon)
- has_nans <- mtcars %>%
- tabyl(carb, cyl) %>%
- .[5:6, ] %>%
- adorn_percentages("col") %>%
- adorn_pct_formatting() %>%
- untabyl()
- row.names(has_nans) <- NULL
- expect_equal(
- has_nans,
- data.frame(
- carb = c(6, 8),
- `4` = c("-", "-"),
- `6` = c("100.0%", "0.0%"),
- `8` = c("0.0%", "100.0%"),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
-
- # NAs convert to -
- has_nas <- data.frame(a = c("big", "little"), x = c(0.1, 0.123), y = c(0.98, NA), stringsAsFactors = FALSE)
- expect_equal(
- adorn_pct_formatting(has_nas),
- data.frame(a = c("big", "little"), x = c("10.0%", "12.3%"), y = c("98.0%", "-"), stringsAsFactors = FALSE)
- )
-})
-
-test_that("NAs are replaced with dashes - no percentage signs affixed", {
- # NaNs from adorn_percentages, the more common case (still uncommon)
- has_nans <- mtcars %>%
- tabyl(carb, cyl) %>%
- .[5:6, ] %>%
- adorn_percentages("col") %>%
- adorn_pct_formatting(affix_sign = FALSE) %>%
- untabyl()
- row.names(has_nans) <- NULL
- expect_equal(
- has_nans,
- data.frame(
- carb = c(6, 8),
- `4` = c("-", "-"),
- `6` = c("100.0", "0.0"),
- `8` = c("0.0", "100.0"),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
-
- # NAs convert to - symbol
- has_nas <- data.frame(a = c("big", "little"), x = c(0.1, 0.123), y = c(0.98, NA), stringsAsFactors = FALSE)
- expect_equal(
- adorn_pct_formatting(has_nas, affix_sign = FALSE),
- data.frame(a = c("big", "little"), x = c("10.0", "12.3"), y = c("98.0", "-"), stringsAsFactors = FALSE)
- )
-})
-
-
-test_that("bad rounding argument caught", {
- expect_error(
- dat %>%
- adorn_percentages() %>%
- adorn_pct_formatting(rounding = "blargh"),
- "`rounding` must be one of \"half to even\" or \"half up\", not \"blargh\".",
- fixed = TRUE
- )
-})
-
-test_that("automatically invokes purrr::map when called on a 3-way tabyl", {
- three <- tabyl(mtcars, cyl, am, gear)
- expect_equal(
- adorn_pct_formatting(three), # vanilla call
- purrr::map(three, adorn_pct_formatting)
- )
-
- # with arguments passing through
- expect_equal(
- adorn_pct_formatting(three, 2, "half up", affix_sign = FALSE),
- purrr::map(three, adorn_pct_formatting, 2, "half up", FALSE)
- )
-})
-
-test_that("non-data.frame inputs are handled", {
- expect_error(adorn_pct_formatting(1:5), "adorn_pct_formatting() must be called on a data.frame or list of data.frames", fixed = TRUE)
-})
-
-test_that("tidyselecting works", {
- target <- data.frame(
- color = c("green", "blue", "red"),
- first_wave = c(1:3),
- second_wave = c(4:6),
- third_wave = c(3, 3, 3),
- size = c("small", "medium", "large"),
- stringsAsFactors = FALSE
- ) %>%
- adorn_percentages()
-
- two_cols <- target %>%
- adorn_pct_formatting(, , , first_wave:second_wave)
- expect_equal(two_cols$first_wave, c("12.5%", "20.0%", "25.0%"))
- expect_equal(two_cols$third_wave, c(3 / 8, 3 / 10, 3 / 12))
-
- expect_message(
- target %>%
- adorn_pct_formatting(, , , third_wave:size),
- "At least one non-numeric column was specified and will not be modified."
- )
- # correct behavior occurs when text columns are skipped
- expect_message(
- text_skipped <- target %>%
- adorn_pct_formatting(., , , , c(first_wave, size)),
- "At least one non-numeric column was specified and will not be modified."
- )
-
- expect_equal(text_skipped$first_wave, c("12.5%", "20.0%", "25.0%"))
- expect_equal(
- text_skipped %>% dplyr::select(-first_wave),
- target %>% dplyr::select(-first_wave),
- ignore_attr = TRUE
- )
-})
-
-test_that("decimal.mark works", {
- locale_decimal_sep <- getOption("OutDec") # not sure if it's necessary to save and restore this,
- # but seems safe for locale-independent testing processes
- options(OutDec = ",")
- expect_true(
- all(grepl(",", unlist(adorn_pct_formatting(source1)[2])))
- )
- options(OutDec = locale_decimal_sep)
-})
diff --git a/tests/testthat/test-adorn-percentages.R b/tests/testthat/test-adorn-percentages.R
deleted file mode 100644
index 6e3caf0e..00000000
--- a/tests/testthat/test-adorn-percentages.R
+++ /dev/null
@@ -1,242 +0,0 @@
-source1 <- mtcars %>%
- tabyl(cyl, am)
-
-test_that("bad input to denominator arg is caught", {
- expect_error(
- mtcars %>%
- adorn_percentages("blargh"),
- "`denominator` must be one of \"row\", \"col\", or \"all\"",
- fixed = TRUE
- )
-})
-
-test_that("calculations are accurate", {
- expect_equal(
- untabyl(adorn_percentages(source1)), # default parameter is denominator = "row"
- data.frame(
- cyl = c(4, 6, 8),
- `0` = c(3 / 11, 4 / 7, 12 / 14),
- `1` = c(8 / 11, 3 / 7, 2 / 14),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
- expect_equal(
- untabyl(adorn_percentages(source1, denominator = "col")),
- data.frame(
- cyl = c(4, 6, 8),
- `0` = c(3 / 19, 4 / 19, 12 / 19),
- `1` = c(8 / 13, 3 / 13, 2 / 13),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
- expect_equal(
- untabyl(adorn_percentages(source1, denominator = "all")),
- data.frame(
- cyl = c(4, 6, 8),
- `0` = c(3 / 32, 4 / 32, 12 / 32),
- `1` = c(8 / 32, 3 / 32, 2 / 32),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
-})
-
-source2 <- source1 %>%
- adorn_totals(c("row", "col"))
-test_that("calculations are correct when totals row/col doesn't match axis of computation", {
- expect_equal(
- untabyl(adorn_percentages(source2, denominator = "row")),
- data.frame(
- cyl = c(4, 6, 8, "Total"),
- `0` = c(3 / 11, 4 / 7, 12 / 14, 19 / 32),
- `1` = c(8 / 11, 3 / 7, 2 / 14, 13 / 32),
- Total = c(1, 1, 1, 1),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("works with totals row/col when denominator = col or all, #357", {
- col_percs <- source1 %>%
- adorn_totals(where = c("col", "row")) %>%
- adorn_percentages(denominator = "col")
- expect_equal(col_percs$Total, c(11, 7, 14, 32) / 32)
- expect_equal(unname(unlist(col_percs[4, ])), c("Total", rep(1, 3)))
-
- # Same but for denominator = all
- all_percs <- source1 %>%
- adorn_totals(where = c("col", "row")) %>%
- adorn_percentages(denominator = "all")
- expect_equal(all_percs$Total, c(11, 7, 14, 32) / 32)
- expect_equal(unname(unlist(all_percs[4, ])), unname(c("Total", colSums(source1)[2:3] / 32, 32 / 32)))
-
- # Now with no totals row, same two tests as preceding
- col_percs_no_row <- source1 %>%
- adorn_totals(where = c("col")) %>%
- adorn_percentages(denominator = "col")
- expect_equal(col_percs_no_row$Total, c(11, 7, 14) / 32)
-
- # Same but for denominator = all
- all_percs_no_row <- source1 %>%
- adorn_totals(where = c("col")) %>%
- adorn_percentages(denominator = "all")
- expect_equal(all_percs_no_row$Total, c(11, 7, 14) / 32)
-
- # And try one where we exempt the totals col
- expect_message(
- col_percs_exempted <- source1 %>%
- adorn_totals(where = c("col", "row")) %>%
- adorn_percentages(denominator = "col", , -Total),
- regexp = "At least one non-numeric column was specified. All non-numeric columns will be removed from percentage calculations."
- )
- expect_equal(col_percs_exempted$Total, c(11, 7, 14, 32))
- expect_equal(unname(unlist(col_percs_exempted[4, ])), c("Total", 1, 1, 32))
-
- expect_message(
- all_percs_exempted <- source1 %>%
- adorn_totals(where = c("col", "row")) %>%
- adorn_percentages(denominator = "all", , -Total),
- regexp = "At least one non-numeric column was specified. All non-numeric columns will be removed from percentage calculations."
- )
- expect_equal(all_percs_exempted$Total, c(11, 7, 14, 32))
- expect_equal(unname(unlist(all_percs_exempted[4, ])), unname(c("Total", colSums(source1)[2:3] / 32, 32)))
-})
-
-source2 <- source1
-source2[2, 2] <- NA
-
-test_that("NAs handled correctly with na.rm = TRUE", {
- expect_equal(
- untabyl(adorn_percentages(source2)), # row
- data.frame(
- cyl = c(4, 6, 8),
- `0` = c(3 / 11, NA, 12 / 14),
- `1` = c(8 / 11, 1, 2 / 14),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
- expect_equal(
- untabyl(adorn_percentages(source2, denominator = "col")),
- data.frame(
- cyl = c(4, 6, 8),
- `0` = c(3 / 15, NA, 12 / 15),
- `1` = c(8 / 13, 3 / 13, 2 / 13),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("NAs handled correctly with na.rm = FALSE", {
- expect_equal(
- untabyl(adorn_percentages(source2, na.rm = FALSE)), # row
- data.frame(
- cyl = c(4, 6, 8),
- `0` = c(3 / 11, NA, 12 / 14),
- `1` = c(8 / 11, NA, 2 / 14),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
- expect_equal(
- untabyl(adorn_percentages(source2, denominator = "col", na.rm = FALSE)),
- data.frame(
- cyl = c(4, 6, 8),
- `0` = as.numeric(c(NA, NA, NA)),
- `1` = c(8 / 13, 3 / 13, 2 / 13),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("data.frames with no numeric columns beyond the first cause failure", {
- expect_error(
- adorn_percentages(data.frame(a = 1:2, b = c("hi", "lo"))),
- "at least one one of columns 2:n must be of class numeric"
- )
-})
-
-test_that("works with a single numeric column per #89", {
- dat <- data.frame(Operation = c("Login", "Posted", "Deleted"), `Total Count` = c(5, 25, 40), check.names = FALSE)
- expect_equal(
- dat %>% adorn_percentages("col") %>% untabyl(),
- data.frame(
- Operation = c("Login", "Posted", "Deleted"),
- `Total Count` = c(5 / 70, 25 / 70, 40 / 70),
- check.names = FALSE
- )
- )
-})
-
-test_that("works with totals row", {
- dat <- data.frame(Operation = c("Login", "Posted", "Deleted"), `Total Count` = c(5, 25, 40), check.names = FALSE)
- expect_equal(
- dat %>% adorn_totals("row") %>% adorn_percentages("col") %>% untabyl(),
- data.frame(
- Operation = c("Login", "Posted", "Deleted", "Total"),
- `Total Count` = c(5 / 70, 25 / 70, 40 / 70, 1),
- check.names = FALSE, stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("automatically invokes purrr::map when called on a 3-way tabyl", {
- three <- tabyl(mtcars, cyl, am, gear)
- expect_equal(
- adorn_percentages(three), # vanilla call
- purrr::map(three, adorn_percentages)
- )
-
- # with arguments passing through
- expect_equal(
- adorn_percentages(three, "col", na.rm = FALSE),
- purrr::map(three, adorn_percentages, "col", FALSE)
- )
-})
-
-test_that("non-data.frame inputs are handled", {
- expect_error(adorn_percentages(1:5), "adorn_percentages() must be called on a data.frame or list of data.frames", fixed = TRUE)
-})
-
-test_that("tidyselecting works", {
- target <- data.frame(
- color = c("green", "blue", "red"),
- first_wave = c(1:3),
- second_wave = c(4:6),
- third_wave = c(3, 3, 3),
- size = c("small", "medium", "large"),
- stringsAsFactors = FALSE
- )
- two_cols <- target %>%
- adorn_percentages(, , , first_wave:second_wave)
- expect_equal(two_cols$first_wave, c(1 / 5, 2 / 7, 3 / 9))
- expect_equal(two_cols$third_wave, rep(3, 3))
-
- expect_message(
- target %>%
- adorn_percentages(., "col", , c(first_wave, size)),
- "At least one non-numeric column was specified. All non-numeric columns will be removed from percentage calculations."
- )
- expect_message(
- text_skipped <- target %>%
- adorn_percentages(., "col", , c(first_wave, size)),
- regexp = "At least one non-numeric column was specified. All non-numeric columns will be removed from percentage calculations."
- )
- expect_equal(text_skipped$first_wave, target$first_wave / sum(target$first_wave))
- expect_equal(
- text_skipped %>% dplyr::select(-first_wave),
- target %>% dplyr::select(-first_wave),
- ignore_attr = TRUE
- )
-
- # Check combination of totals and tidyselecting does not modify totals col
- totaled <- target %>%
- adorn_totals("col", , , , second_wave:third_wave) %>%
- adorn_percentages(, , , second_wave:third_wave)
- expect_equal(totaled$Total, 7:9)
-})
diff --git a/tests/testthat/test-adorn-rounding.R b/tests/testthat/test-adorn-rounding.R
deleted file mode 100644
index b98a025e..00000000
--- a/tests/testthat/test-adorn-rounding.R
+++ /dev/null
@@ -1,120 +0,0 @@
-x <- data.frame(
- a = c(rep("x", 55), rep("y", 45)),
- b = c(rep("x", 50), rep("y", 50)),
- stringsAsFactors = FALSE
-)
-
-# Crosstab with decimal values ending in .5
-y <- x %>%
- tabyl(a, b) %>%
- adorn_percentages("all")
-
-test_that("rounding parameter works", {
- expect_equal(
- y %>%
- adorn_rounding(digits = 1, rounding = "half up") %>%
- untabyl(),
- data.frame(
- a = c("x", "y"),
- x = c(0.5, 0.0),
- y = c(0.1, 0.5),
- stringsAsFactors = FALSE
- )
- )
- # Test failing on CRAN and only there
- skip_on_cran()
- expect_equal(
- y %>%
- adorn_rounding(digits = 1) %>% # default rounding: "half to even"
- untabyl(),
- data.frame(
- a = c("x", "y"),
- x = c(0.5, 0.0),
- y = c(0.0, 0.4),
- stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("digit control succeeds", {
- expect_equal(
- y %>%
- adorn_rounding(digits = 0, rounding = "half up") %>%
- untabyl(),
- data.frame(
- a = c("x", "y"),
- x = c(1, 0),
- y = c(0, 0),
- stringsAsFactors = FALSE
- )
- )
- expect_equal(
- y %>%
- adorn_rounding(digits = 2, rounding = "half up"), # shouldn't do anything given the input only having 2 decimal places
- y
- )
-})
-
-test_that("bad rounding argument caught", {
- expect_error(
- y %>%
- adorn_rounding(rounding = "blargh"),
- "'rounding' must be one of 'half to even' or 'half up'",
- fixed = TRUE
- )
-})
-
-test_that("works when called on a 3-way tabyl", {
- triple <- mtcars %>%
- tabyl(am, cyl, vs) %>%
- adorn_percentages("row")
-
- triple_rounded_manual <- triple
- triple_rounded_manual[[1]] <- adorn_rounding(triple[[1]])
- triple_rounded_manual[[2]] <- adorn_rounding(triple[[2]])
-
- expect_equal(
- triple %>%
- adorn_rounding(),
- triple_rounded_manual
- )
-})
-
-
-test_that("tidyselecting works", {
- target <- data.frame(
- color = c("green", "blue", "red"),
- first_wave = c(1:3),
- second_wave = c(4:6),
- third_wave = c(3, 3, 3),
- size = c("small", "medium", "large"),
- stringsAsFactors = FALSE
- ) %>%
- adorn_percentages()
-
- two_cols <- target %>%
- adorn_rounding(, "half up", first_wave:second_wave)
- expect_equal(two_cols$first_wave, c(.1, .2, .3))
- expect_equal(two_cols$third_wave, c(3 / 8, 3 / 10, 3 / 12))
-
- expect_message(
- target %>%
- adorn_rounding(, , third_wave:size),
- "At least one non-numeric column was specified and will not be modified."
- )
- expect_message(
- text_skipped <- target %>%
- adorn_rounding(, , c(first_wave, size)),
- "At least one non-numeric column was specified and will not be modified."
- )
- expect_equal(text_skipped$first_wave, c(.1, .2, .2))
- expect_equal(
- text_skipped %>% dplyr::select(-first_wave),
- target %>% dplyr::select(-first_wave),
- ignore_attr = TRUE
- )
-})
-
-test_that("non-data.frame inputs are handled", {
- expect_error(adorn_rounding(1:5), "adorn_rounding() must be called on a data.frame or list of data.frames", fixed = TRUE)
-})
diff --git a/tests/testthat/test-adorn-title.R b/tests/testthat/test-adorn-title.R
deleted file mode 100644
index 6dab98bb..00000000
--- a/tests/testthat/test-adorn-title.R
+++ /dev/null
@@ -1,165 +0,0 @@
-source1 <- mtcars %>%
- tabyl(gear, cyl)
-
-test_that("placement is correct", {
- # Top
- expect_equal(
- source1 %>%
- adorn_title() %>%
- names(),
- c("", "cyl", rep("", 2))
- )
- expect_equal(
- source1 %>%
- adorn_title() %>%
- .[1, ] %>%
- unlist() %>%
- unname(),
- c("gear", "4", "6", "8")
- )
- # Combined
- expect_equal(
- source1 %>%
- adorn_title("combined") %>%
- names(),
- c("gear/cyl", "4", "6", "8")
- )
-})
-
-test_that("name overrides work", {
- expect_equal(
- source1 %>%
- adorn_title(row_name = "R", col_name = "C") %>%
- names(),
- c("", "C", rep("", 2))
- )
-})
-
-test_that("non-tabyls are treated correctly", {
- non_tab <- mtcars %>%
- dplyr::count(gear, cyl) %>%
- tidyr::pivot_wider(names_from = gear, values_from = n)
- expect_error(adorn_title(non_tab), "When input is not a data.frame of class tabyl, a value must be specified for the col_name argument")
-
- expect_equal(
- non_tab %>% adorn_title(col_name = "col") %>% names(),
- c("", "col", rep("", 2))
- )
-
- expect_equal(
- non_tab %>% adorn_title(placement = "combined", col_name = "col") %>% names(),
- c("cyl/col", 3, 4, 5)
- )
-
- expect_equal(
- non_tab %>% adorn_title(placement = "combined", row_name = "row!", col_name = "col") %>% names(),
- c("row!/col", 3, 4, 5)
- )
-})
-test_that("bad inputs are caught", {
- expect_error(adorn_title(1:2),
- "\"dat\" must be a data.frame",
- fixed = TRUE
- )
- expect_error(
- adorn_title(source1,
- placement = "blargh"
- ),
- "`placement` must be one of \"top\" or \"combined\"",
- fixed = TRUE
- )
- expect_error(
- adorn_title(source1,
- row_name = 1:4
- ),
- "row_name must be a string"
- )
- expect_error(
- adorn_title(source1,
- col_name = mtcars
- ),
- "col_name must be a string"
- )
-
- # Doesn't make sense with a one-way tabyl
- expect_warning(
- mtcars %>% tabyl(cyl) %>% adorn_title(),
- "adorn_title is meant for two-way tabyls, calling it on a one-way tabyl may not yield a meaningful result"
- )
-})
-
-test_that("works with non-count inputs", {
- source2_base <- data.frame(sector = c("North", "South"), units = 1:2, group = c("a", "b"))
- source2_tibble <- dplyr::as_tibble(source2_base)
- expect_equal(
- adorn_title(source2_base, col_name = "Characteristics") %>% names(),
- c("", "Characteristics", "")
- )
- expect_equal(
- adorn_title(source2_base, col_name = "Characteristics"),
- adorn_title(source2_tibble, col_name = "Characteristics")
- )
-})
-
-test_that("for printing purposes: tabyl class stays tabyl, data.frame stays data.frame, tibble is downgraded to data.frame", {
- # right output classes with tabyl inputs
- expect_equal(class(mtcars %>% tabyl(cyl, am) %>% adorn_title()), c("tabyl", "data.frame"))
- expect_equal(class(mtcars %>% tabyl(gear, carb) %>% adorn_title(., "combined")), c("tabyl", "data.frame"))
-
- # Create tibble input:
- mpg_by_cyl_and_am <-
- mtcars %>%
- dplyr::group_by(cyl, am) %>%
- dplyr::summarise(mean_mpg = mean(mpg)) %>%
- tidyr::pivot_wider(names_from = am, values_from = mean_mpg)
-
- # handles tibble input
- expect_s3_class(
- mpg_by_cyl_and_am %>% adorn_title("top", "Cylinders", "Automatic?"),
- "data.frame"
- )
-
- # Convert columns 2:n to strings
- expect_s3_class(
- mpg_by_cyl_and_am %>% adorn_pct_formatting() %>% # nonsense command here, just want to convert cols 2:n into character
- adorn_title("top", "Cylinders", "Automatic?"),
- "data.frame"
- )
-
- # handles data.frame non-tabyl input
- expect_s3_class(
- mtcars %>% adorn_title("top", col_name = "hey look ma I'm a title"),
- "data.frame"
- )
-})
-
-test_that("works with factors in input", {
- facts <- data.frame(a = "high", large = "1", stringsAsFactors = TRUE)
- # first with "top" then "combined"
- expect_equal(
- facts %>% adorn_title(col_name = "col"),
- data.frame(a = c("a", "high"), col = c("large", "1"), stringsAsFactors = FALSE) %>%
- setNames(., c("", "col"))
- )
- # with combined the original column types are preserved
- expect_equal(
- facts %>% adorn_title("combined", col_name = "col"),
- data.frame(`a/col` = "high", large = "1", stringsAsFactors = TRUE, check.names = FALSE)
- )
-})
-
-test_that("automatically invokes purrr::map when called on a 3-way tabyl", {
- three <- tabyl(mtcars, cyl, am, gear) %>%
- adorn_percentages() %>%
- adorn_pct_formatting()
- expect_equal(
- adorn_title(three), # vanilla call
- purrr::map(three, adorn_title)
- )
-
- # with arguments passing through, incl. custom row and col names
- expect_equal(
- adorn_title(three, "combined", "cyl", "am"),
- purrr::map(three, adorn_title, "combined", "cyl", "am")
- )
-})
diff --git a/tests/testthat/test-adorn-totals.R b/tests/testthat/test-adorn-totals.R
deleted file mode 100644
index 045460be..00000000
--- a/tests/testthat/test-adorn-totals.R
+++ /dev/null
@@ -1,595 +0,0 @@
-dat <- data.frame(
- a = factor(c(rep(c("big", "small", "big"), 3)), levels = c("small", "big")),
- b = c(1:3, 1:3, 1, 1, 1)
-)
-ct <- dat %>%
- tabyl(a, b)
-
-mixed <- data.frame(
- a = 1:3,
- b = c("x", "y", "z"),
- c = 5:7,
- d = c("big", "med", "small"),
- stringsAsFactors = FALSE
-)
-
-test_that("totals row is correct", {
- expect_equal(
- untabyl(adorn_totals(ct, "row")),
- data.frame(
- a = factor(c("small", "big", "Total"), levels = c("small", "big", "Total")),
- `1` = c(1, 4, 5),
- `2` = c(2, 0, 2),
- `3` = c(0, 2, 2),
- check.names = FALSE
- )
- )
-})
-
-test_that("totals col is correct", {
- expect_equal(
- untabyl(adorn_totals(ct, "col")),
- data.frame(
- a = factor(c("small", "big"), levels = c("small", "big")),
- `1` = c(1, 4),
- `2` = c(2, 0),
- `3` = c(0, 2),
- Total = c(3, 6),
- check.names = FALSE
- )
- )
-})
-
-test_that("totals row and col produce correct results when called together", {
- expect_equal(
- ct %>%
- adorn_totals(c("row", "col")) %>%
- untabyl(),
- data.frame(
- a = factor(c("small", "big", "Total"), levels = c("small", "big", "Total")),
- `1` = c(1, 4, 5),
- `2` = c(2, 0, 2),
- `3` = c(0, 2, 2),
- Total = c(3, 6, 9),
- check.names = FALSE
- )
- )
-})
-
-test_that("totals where='both' produce equivalent results to c('row','col')", {
- expect_equal(
- ct %>%
- adorn_totals("both") %>%
- untabyl(),
- ct %>%
- adorn_totals(c("row", "col")) %>%
- untabyl()
- )
-})
-
-test_that("order doesn't matter when row and col are called together", {
- expect_equal(
- ct %>%
- adorn_totals(c("row", "col")) %>%
- untabyl(),
- ct %>%
- adorn_totals(c("col", "row")) %>%
- untabyl()
- )
-})
-
-test_that("both functions work with a single column", {
- single_col <- tibble::tibble(
- a = c(as.Date("2016-01-01"), as.Date("2016-02-03")),
- b = c(1, 2)
- )
- expect_error(single_col %>% adorn_totals("row"), NA) # this method of testing passage is from http://stackoverflow.com/a/30068233
- expect_error(single_col %>% adorn_totals("col"), NA)
- expect_error(single_col %>% adorn_totals(c("col", "row")), NA)
-})
-
-dat <- data.frame(
- a = c("hi", "lo"),
- b = c(1, 2),
- c = c(5, 10),
- d = c("big", "small"),
- e = c(20, NA),
- stringsAsFactors = FALSE
-)
-
-test_that("numeric first column is ignored", {
- expect_equal(
- mtcars %>%
- tabyl(cyl, gear) %>%
- adorn_totals("col") %>%
- untabyl(),
- data.frame(
- cyl = c(4, 6, 8),
- `3` = c(1, 2, 12),
- `4` = c(8, 4, 0),
- `5` = c(2, 1, 2),
- Total = c(11, 7, 14),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
-})
-
-# create input tables for subsequent testing
-ct_2 <-
- mtcars %>%
- dplyr::group_by(cyl, gear) %>%
- dplyr::tally() %>%
- tidyr::pivot_wider(names_from = gear, values_from = n)
-df1 <- data.frame(x = c(1, 2), y = c(NA, 4))
-
-test_that("grouped_df gets ungrouped and succeeds", {
- ct_2 <-
- mtcars %>%
- dplyr::group_by(cyl, gear) %>%
- dplyr::tally() %>%
- tidyr::pivot_wider(names_from = gear, values_from = n)
- expect_equal(
- ct_2 %>% adorn_totals(),
- ct_2 %>% dplyr::ungroup() %>% adorn_totals()
- )
-})
-
-test_that("na.rm value works correctly", {
- expect_equal(
- df1 %>% adorn_totals(c("row", "col"), na.rm = FALSE) %>% untabyl(),
- data.frame(
- x = c("1", "2", "Total"),
- y = c(NA, 4, NA),
- Total = c(NA, 4, NA),
- stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("add_totals respects if input was data.frame", {
- expect_equal(
- class(df1),
- class(df1 %>% adorn_totals() %>% untabyl())
- )
-})
-
-test_that("add_totals respects if input was tibble", {
- expect_equal(
- class(df1 %>% tibble::as_tibble()),
- class(df1 %>% tibble::as_tibble() %>% adorn_totals() %>% untabyl())
- )
-})
-
-test_that("error thrown if no columns past first are numeric", {
- df2 <- data.frame(
- x = c("big", "small"),
- y = c("hi", "lo")
- )
- expect_error(
- adorn_totals(df2, "col"),
- "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions."
- )
- expect_error(
- mixed %>%
- adorn_totals("row", "-", TRUE, "Totals", d),
- "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions."
- )
-
- # Add a test where only the first column is numeric
- df3 <- data.frame(
- x = 1:2,
- y = c("hi", "lo")
- )
- expect_error(
- adorn_totals(df3),
- "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions."
- )
-})
-
-test_that("bad input to where arg is caught", {
- expect_error(
- mtcars %>%
- adorn_totals("blargh"),
- paste0('"where" must be one of "row", "col", or c("row", "col")'),
- fixed = TRUE
- )
-})
-
-
-test_that("works with non-numeric columns mixed in; fill character specification", {
- expect_equal(
- mixed %>% adorn_totals(where = c("row", "col"), fill = "*") %>% untabyl(),
- data.frame(
- a = c("1", "2", "3", "Total"),
- b = c("x", "y", "z", "*"),
- c = c(5, 6, 7, 18),
- d = c("big", "med", "small", "*"),
- Total = c(5, 6, 7, 18),
- stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("fill works with multiple factor and date columns", {
- has_facs <- data.frame(
- a = c("hi", "low"),
- b = c("big", "small"),
- c = c(as.Date("2000-01-01"), as.Date("2000-01-02")),
- d = 1:2
- )
- expect_equal(
- adorn_totals(has_facs, "row") %>% untabyl(),
- data.frame(
- a = c("hi", "low", "Total"),
- b = c("big", "small", "-"),
- c = c("2000-01-01", "2000-01-02", "-"),
- d = 1:3,
- stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("totals attributes are assigned correctly", {
- post <- adorn_totals(ct, c("row", "col"))
- expect_equal(attr(post, "totals"), c("row", "col"))
- expect_equal(class(post), c("tabyl", "data.frame"))
- expect_equal(attr(post, "tabyl_type"), "two_way")
- expect_equal(attr(post, "core"), untabyl(ct))
-
- post_col <- adorn_totals(ct, "col")
- expect_equal(attr(post_col, "totals"), "col")
- expect_equal(class(post_col), c("tabyl", "data.frame"))
- expect_equal(attr(post_col, "tabyl_type"), "two_way")
- expect_equal(attr(post_col, "core"), untabyl(ct))
-
- post_sequential_both <- adorn_totals(ct, "col") %>%
- adorn_totals("row")
- expect_equal(post_sequential_both, post, ignore_attr = TRUE)
- expect_equal(
- sort(attr(post, "totals")),
- sort(attr(post_sequential_both, "totals"))
- )
-})
-
-test_that("trying to re-adorn a dimension fails", {
- expect_error(
- ct %>% adorn_totals("col") %>% adorn_totals("col"),
- "trying to re-add a totals dimension that is already been added"
- )
- expect_error(
- ct %>% adorn_totals() %>% adorn_totals(),
- "trying to re-add a totals dimension that is already been added"
- )
-})
-
-test_that("automatically invokes purrr::map when called on a 3-way tabyl", {
- three <- tabyl(mtcars, cyl, am, gear)
- expect_equal(
- adorn_totals(three), # vanilla call
- purrr::map(three, adorn_totals)
- )
-
- # with arguments passing through
- expect_equal(
- adorn_totals(three, c("row", "col"), fill = "---", na.rm = FALSE, name = "dummy_name"),
- purrr::map(three, adorn_totals, c("row", "col"), fill = "---", FALSE, name = "dummy_name")
- )
-})
-
-test_that("non-data.frame inputs are handled", {
- expect_error(adorn_totals(1:5), "adorn_totals() must be called on a data.frame or list of data.frames", fixed = TRUE)
-})
-
-test_that("row total name is changed", {
- expect_equal(
- as.character(adorn_totals(ct, name = "NewTitle")[nrow(ct) + 1, 1]),
- "NewTitle"
- )
-})
-
-test_that("column total name is changed", {
- expect_equal(
- colnames(adorn_totals(ct, where = "col", name = "NewTitle"))[(ncol(ct) + 1)],
- "NewTitle"
- )
-})
-
-test_that("tidyselecting works", {
- cyl_gear <- mtcars %>%
- adorn_totals(c("row", "col"), "-", TRUE, "cylgear", c(cyl, gear))
- expect_equal(cyl_gear$cylgear, c(mtcars$cyl + mtcars$gear, (sum(mtcars$cyl) + sum(mtcars$gear))))
- expect_equal(
- unname(unlist(cyl_gear[33, ])),
- c("cylgear", "198", rep("-", 7), "118", "-", "316")
- )
-
- # Can override the first column not being included
- # adorn_totals() still fails if ONLY the first column is numeric, that's fine - it's a nonsensical operation
- simple <- data.frame(
- x = 1:2,
- y = 3:4,
- z = c("hi", "lo")
- )
-
- expect_message(
- simple %>%
- adorn_totals(c("row", "col"), "-", TRUE, "Total", x),
- "Because the first column was specified to be totaled, it does not contain the label 'Total' (or user-specified name) in the totals row",
- fixed = TRUE
- )
-
- expect_message(
- simple_total <- simple %>%
- adorn_totals(c("row", "col"), "-", TRUE, "Total", x),
- regexp = "Because the first column was specified to be totaled, it does not contain the label 'Total' (or user-specified name) in the totals row",
- fixed = TRUE
- )
-
- expect_equal(unname(unlist(simple_total[3, ])), c("3", "-", "-", "3"))
- expect_equal(simple_total$Total, 1:3)
-
- # test that leaving out a numeric column of a tibble succeeds, #388
- expect_equal(
- simple %>%
- adorn_totals(, , , , y) %>%
- as.data.frame(),
- simple %>%
- tibble::tibble() %>%
- adorn_totals() %>%
- as.data.frame()
- )
-})
-
-test_that("supplying NA to fill preserves column types", {
- test_df <- data.frame(
- a = c("hi", "low", "med"),
- b = factor(c("big", "small", "regular")),
- c = c(as.Date("2000-01-01"), as.Date("2000-01-02"), as.Date("2000-01-03")),
- d = c(as.POSIXct("2000-01-01", tz = "ROK"), as.POSIXct("2000-01-02"), as.POSIXct("2000-01-03")),
- e = 1:3,
- f = 4:6,
- g = c(TRUE, FALSE, TRUE),
- h = c(7.2, 8.2, 9.2),
- stringsAsFactors = FALSE
- )
-
- out <- adorn_totals(test_df, fill = NA)
-
- # expect types to be preserved
- expect_type(out[["a"]], "character")
- expect_s3_class(out[["b"]], "factor")
- expect_s3_class(out[["c"]], "Date")
- expect_s3_class(out[["d"]], "POSIXct")
- expect_type(out[["g"]], "logical")
- # expect factor levels to be preserved
- expect_equal(levels(out[["b"]]), levels(test_df[["b"]]))
- # expect NAs in total rows for non-numerics
- expect_true(is.na(out[4, "b"]))
- expect_true(is.na(out[4, "c"]))
- expect_true(is.na(out[4, "d"]))
- expect_true(is.na(out[4, "g"]))
- # test values of totals
- expect_equal(out[4, "a"], "Total")
- expect_equal(out[4, "e"], 6)
- expect_equal(out[4, "f"], 15)
- expect_equal(out[4, "h"], 24.6)
- # expect original df intact
- expect_equal(test_df, out[1:3, ], ignore_attr = TRUE)
-})
-
-test_that("supplying NA as fill still works with non-character first col and numeric non-totaled cols", {
- test_df <- data.frame(
- a = factor(c("hi", "low", "med"), levels = c("low", "med", "hi")),
- b = factor(c("big", "small", "regular")),
- c = c(as.Date("2000-01-01"), as.Date("2000-01-02"), as.Date("2000-01-03")),
- d = 1:3,
- e = 4:6,
- f = c(TRUE, FALSE, TRUE),
- g = c(7.2, 8.2, 9.2),
- stringsAsFactors = FALSE
- )
-
- out <- adorn_totals(test_df,
- where = "row",
- fill = NA,
- na.rm = TRUE,
- name = "Total",
- d, e
- )
-
- expect_equal(out[["a"]], factor(c("hi", "low", "med", "Total"), levels = c("low", "med", "hi", "Total")))
- expect_equal(out[["g"]], c(7.2, 8.2, 9.2, NA_real_))
- expect_equal(out[4, "d"], 6)
- expect_equal(out[4, "e"], 15)
- expect_equal(test_df[1:3, 2:7], out[1:3, 2:7], ignore_attr = TRUE)
-})
-
-test_that("one_way tabyl inputs retain that class", {
- expect_equal(
- attr(mtcars %>% tabyl(am) %>% adorn_totals("both"), "tabyl_type"),
- "one_way"
- )
-})
-
-
-# Tests from #413, different values for row and col names
-test_that("long vectors are trimmed", {
- expect_equal(
- mixed %>%
- adorn_totals(
- where = "row",
- name = c("total", "something_else"),
- fill = "-"
- ) %>%
- untabyl(),
- data.frame(
- a = c(as.character(1:3), "total"),
- b = c("x", "y", "z", "-"),
- c = c(5:7, 18),
- d = c("big", "med", "small", "-"),
- stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("row and column names are taken correctly from a vector", {
- expect_equal(
- mixed %>%
- adorn_totals(
- where = "both",
- name = c("row_name", "col_name"),
- fill = "-"
- ) %>%
- untabyl(),
- data.frame(
- a = c(as.character(1:3), "row_name"),
- b = c("x", "y", "z", "-"),
- c = c(5, 6, 7, 18),
- d = c("big", "med", "small", "-"),
- col_name = c(5, 6, 7, 18),
- stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("row and column names are taken correctly from a single name", {
- expect_equal(
- mixed %>%
- adorn_totals(
- where = "both",
- name = "totals",
- fill = "-"
- ) %>%
- untabyl(),
- data.frame(
- a = c(as.character(1:3), "totals"),
- b = c("x", "y", "z", "-"),
- c = c(5, 6, 7, 18),
- d = c("big", "med", "small", "-"),
- totals = c(5, 6, 7, 18),
- stringsAsFactors = FALSE
- )
- )
-})
-
-dat <- data.frame(
- a = c(rep(c("big", "small", "big"), 3)),
- b = c(1:3, 1:3, 1, 1, 1),
- stringsAsFactors = TRUE
-)
-ct <- dat %>%
- tabyl(a, b)
-
-mixed <- data.frame(
- a = 1:3,
- b = c("x", "y", "z"),
- c = 5:7,
- d = c("big", "med", "small"),
- stringsAsFactors = FALSE
-)
-
-test_that("long vectors are trimmed", {
- expect_equal(
- mixed %>%
- adorn_totals(
- where = "row",
- name = c("total", "row_total"),
- fill = "-"
- ) %>%
- untabyl(),
- data.frame(
- a = c(as.character(1:3), "total"),
- b = c("x", "y", "z", "-"),
- c = c(5:7, 18),
- d = c("big", "med", "small", "-"),
- stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("row and column names are taken correctly from a vector", {
- expect_equal(
- mixed %>%
- adorn_totals(
- where = "both",
- name = c("column_totals", "row_totals"),
- fill = "-"
- ) %>%
- untabyl(),
- data.frame(
- a = c(as.character(1:3), "column_totals"),
- b = c("x", "y", "z", "-"),
- c = c(5, 6, 7, 18),
- d = c("big", "med", "small", "-"),
- row_totals = c(5, 6, 7, 18),
- stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("row and column names are taken correctly from a single name", {
- expect_equal(
- mixed %>%
- adorn_totals(
- where = "both",
- name = "totals",
- fill = "-"
- ) %>%
- untabyl(),
- data.frame(
- a = c(as.character(1:3), "totals"),
- b = c("x", "y", "z", "-"),
- c = c(5, 6, 7, 18),
- d = c("big", "med", "small", "-"),
- totals = c(5, 6, 7, 18),
- stringsAsFactors = FALSE
- )
- )
-})
-
-test_that("order is maintained when first column is a factor, #494", {
- o <- data.frame(
- a = 1:5,
- fac = factor(c("orange", "blue", "orange", "orange", "blue")),
- ord = ordered(
- c("huge", "medium", "small", "medium", "medium"),
- levels = c("small", "medium", "huge")
- )
- )
-
- o_tabyl_totaled <- o %>%
- tabyl(ord, a) %>%
- adorn_totals("both")
-
- expect_equal(
- attr(o_tabyl_totaled$ord, "levels"),
- c("small", "medium", "huge", "Total")
- )
- expect_equal(
- class(o_tabyl_totaled$ord),
- c("ordered", "factor")
- )
- f_tabyl_totaled <- o %>%
- tabyl(fac, a) %>%
- adorn_totals("both")
-
- expect_equal(
- attr(f_tabyl_totaled$fac, "levels"),
- c("blue", "orange", "Total")
- )
- expect_equal(
- class(f_tabyl_totaled$fac),
- "factor"
- )
-})
-
-test_that("if factor level already present, adorn_totals() still works, #529", {
- factor_present <- mtcars %>%
- tabyl(am, cyl)
- factor_present$am <- factor(factor_present$am, levels = c("0", "1", "Total"))
- expect_equal(
- levels(adorn_totals(factor_present, "row")$am),
- c("0", "1", "Total")
- )
-})
diff --git a/tests/testthat/test-get-level-groups.R b/tests/testthat/test-get-level-groups.R
deleted file mode 100644
index b95c30fd..00000000
--- a/tests/testthat/test-get-level-groups.R
+++ /dev/null
@@ -1,24 +0,0 @@
-# Tests the get_level_groups helper function called by top_levels()
-
-shorts <- factor(c("a", "b", "c", "d", "e", "f"), levels = rev(letters[1:6]))
-longs <- factor(c("aaaaaaaaaaaaaaaa", "bbbbbbbbbbbbbbbbb", "cccccccccccccccccccc", "dddddddddddddddd", NA, "hhhhhhhhhhhhhhhh", "bbbbbbbbbbbbbbbbb"), levels = c("dddddddddddddddd", "aaaaaaaaaaaaaaaa", "cccccccccccccccccccc", "bbbbbbbbbbbbbbbbb", "hhhhhhhhhhhhhhhh"))
-
-short1 <- get_level_groups(shorts, 1, max(as.numeric(shorts), na.rm = TRUE))
-short2 <- get_level_groups(shorts, 2, max(as.numeric(shorts), na.rm = TRUE))
-short3 <- get_level_groups(shorts, 3, max(as.numeric(shorts), na.rm = TRUE))
-
-test_that("names are grouped properly and groups are ordered correctly", {
- expect_equal(short1, list(top = "f", mid = "e, d, c, b", bot = "a"))
- expect_equal(short2, list(top = "f, e", mid = c("d, c"), bot = "b, a"))
- expect_equal(short3, list(top = "f, e, d", mid = NA, bot = "c, b, a"))
-})
-
-long1 <- get_level_groups(longs, 1, max(as.numeric(longs), na.rm = TRUE))
-long2 <- get_level_groups(longs, 2, max(as.numeric(longs), na.rm = TRUE))
-
-test_that("truncation works correctly", {
- expect_equal(long1, list(top = "dddddddddddddddd", mid = "<<< Middle Group (3 categories) >>>", bot = "hhhhhhhhhhhhhhhh"))
- expect_equal(long2, list(top = "dddddddddddddddd, aaaaaaaaa...", mid = "cccccccccccccccccccc", bot = "bbbbbbbbbbbbbbbbb, hhhhhhhh..."))
- expect_equal(nchar(long2$top), 30)
- expect_equal(nchar(long2$bot), 30)
-})
diff --git a/tests/testthat/test-statistical-tests.R b/tests/testthat/test-statistical-tests.R
deleted file mode 100644
index 4f0c6ddd..00000000
--- a/tests/testthat/test-statistical-tests.R
+++ /dev/null
@@ -1,126 +0,0 @@
-# Tests for two-way statistical tests
-
-# Duplicate mtcars rows to avoid chis.test warnings
-mtcars3 <- rbind(mtcars, mtcars, mtcars)
-tab <- table(mtcars3$am, mtcars3$cyl)
-ttab <- tabyl(mtcars3, am, cyl)
-ow_tab <- tabyl(mtcars3, am)
-
-test_that("one-way tabyl is rejected by chisq.test and fisher.test", {
- expect_error(chisq.test(ow_tab))
- expect_error(fisher.test(ow_tab))
-})
-
-test_that("janitor::chisq.test on a table is correct", {
- res <- stats::chisq.test(tab)
- jres <- janitor::chisq.test(tab)
- expect_equal(jres, res)
-})
-
-test_that("janitor::chisq.test on a matrix is correct", {
- mat <- matrix(c(151, 434, 345, 221, 145, 167), ncol = 3)
- res <- stats::chisq.test(mat)
- jres <- janitor::chisq.test(mat)
- expect_equal(jres, res)
-})
-
-test_that("janitor::chisq.test on two factors is correct", {
- res <- stats::chisq.test(mtcars3$am, mtcars3$cyl)
- jres <- janitor::chisq.test(mtcars3$am, mtcars3$cyl)
- expect_equal(jres, res)
-})
-
-test_that("janitor::chisq.test with a numeric vector and p is correct", {
- v1 <- round(runif(10, 200, 1000))
- v2 <- round(runif(10, 200, 1000))
- res <- stats::chisq.test(v1, p = v2 / sum(v2))
- jres <- janitor::chisq.test(v1, p = v2 / sum(v2))
- expect_equal(jres, res)
-})
-
-test_that("janitor::fisher.test on a table is correct", {
- res <- stats::fisher.test(tab)
- jres <- janitor::fisher.test(tab)
- expect_equal(jres, res)
-})
-
-test_that("janitor::fisher.test on a matrix is correct", {
- mat <- matrix(c(151, 434, 345, 221, 145, 167), ncol = 3)
- res <- stats::fisher.test(mat)
- jres <- janitor::fisher.test(mat)
- expect_equal(jres, res)
-})
-
-test_that("janitor::fisher.test on two vectors is correct", {
- res <- stats::fisher.test(mtcars3$am, mtcars3$cyl)
- jres <- janitor::fisher.test(mtcars3$am, mtcars3$cyl)
- expect_equal(jres, res)
-})
-
-test_that("janitor::chisq.test on a two-way tabyl is identical to stats::chisq.test", {
- tab <- tabyl(mtcars3, am, cyl)
- tres <- chisq.test(tab, tabyl_results = FALSE)
- tab <- table(mtcars3$am, mtcars3$cyl)
- res <- chisq.test(tab)
- expect_equal(tres, res)
-})
-
-test_that("janitor::fisher.test on a two-way tabyl is identical to stats::fisher.test", {
- tab <- tabyl(mtcars3, am, cyl)
- tres <- fisher.test(tab)
- tab <- table(mtcars3$am, mtcars3$cyl)
- res <- fisher.test(tab)
- expect_equal(tres, res)
-})
-
-test_that("janitor::chisq.test returns tabyl tables", {
- tres <- chisq.test(ttab, tabyl_results = TRUE)
- expect_s3_class(tres$observed, "tabyl")
- expect_s3_class(tres$expected, "tabyl")
- expect_s3_class(tres$residuals, "tabyl")
- expect_s3_class(tres$stdres, "tabyl")
-})
-
-test_that("returned tabyls have correct names and attributes", {
- tres <- chisq.test(ttab, tabyl_results = TRUE)
- expect_named(tres$observed, c("am", "4", "6", "8"))
- expect_named(tres$expected, c("am", "4", "6", "8"))
- expect_named(tres$residuals, c("am", "4", "6", "8"))
- expect_named(tres$stdres, c("am", "4", "6", "8"))
- expect_equal(tres$observed[[1]], c("0", "1"))
- expect_equal(tres$expected[[1]], c("0", "1"))
- expect_equal(tres$residuals[[1]], c("0", "1"))
- expect_equal(tres$stdres[[1]], c("0", "1"))
- expect_equal(attr(tres$observed, "var_names"), list(row = "am", col = "cyl"))
- expect_equal(attr(tres$expected, "var_names"), list(row = "am", col = "cyl"))
- expect_equal(attr(tres$residuals, "var_names"), list(row = "am", col = "cyl"))
- expect_equal(attr(tres$stdres, "var_names"), list(row = "am", col = "cyl"))
-})
-
-test_that("totals are excluded from the statistical tests, #385", {
- # Chi-Square
- cx <- chisq.test(ttab)
- cx_totals <- suppressWarnings(chisq.test(adorn_totals(ttab, "both")))
- cx_totals$data.name <- "ttab" # otherwise the test shows a mismatch, as the inputs had different names
- expect_equal(
- cx,
- cx_totals
- )
- expect_warning(
- chisq.test(ttab %>% adorn_totals()),
- "detected a totals row"
- )
-
- # Fisher
- fisher <- fisher.test(ttab)
- fisher_totals <- suppressWarnings(fisher.test(adorn_totals(ttab, "both")))
- fisher_totals$data.name <- "ttab" # otherwise the test shows a mismatch, as the inputs had different names
- expect_equal(
- fisher,
- fisher_totals
- )
- expect_warning(
- fisher.test(ttab %>% adorn_totals()),
- "detected a totals row"
- )
-})
diff --git a/tests/testthat/test-tabyl-classifiers.R b/tests/testthat/test-tabyl-classifiers.R
deleted file mode 100644
index f682d9e4..00000000
--- a/tests/testthat/test-tabyl-classifiers.R
+++ /dev/null
@@ -1,119 +0,0 @@
-# Tests tabyl class functions
-
-a <- mtcars %>%
- tabyl(cyl, carb)
-
-b <- mtcars %>%
- dplyr::count(cyl, carb) %>%
- tidyr::pivot_wider(
- names_from = carb,
- values_from = n,
- values_fill = 0,
- names_sort = TRUE
- ) %>%
- as.data.frame() # for comparison purposes, remove the tbl_df aspect
-
-
-test_that("as_tabyl works on result of a non-janitor count/pivot_wider", {
- expect_equal(
- as_tabyl(a),
- as_tabyl(b, 2, "cyl", "carb")
- )
-})
-
-test_that("as_tabyl sets attributes correctly", {
- d <- as_tabyl(a)
- expect_equal(class(d), class(a))
- expect_equal(attr(d, "core"), untabyl(a))
- expect_equal(attr(d, "tabyl_type"), "two_way")
-})
-
-test_that("untabyl puts back to original form", {
- expect_equal(mtcars, untabyl(as_tabyl(mtcars)))
-})
-
-test_that("untabyl warns if called on non-tabyl", {
- expect_warning(
- untabyl(mtcars),
- "untabyl\\(\\) called on a non-tabyl"
- )
-})
-
-test_that("untabyl automatically invokes purrr::map when called on a 3-way tabyl", {
- three <- tabyl(mtcars, cyl, am, gear)
- expect_equal(
- untabyl(three), # vanilla call
- purrr::map(three, untabyl)
- )
-})
-
-test_that("as_tabyl is okay with non-numeric columns", {
- e <- b %>%
- dplyr::mutate(extra = "val")
- expect_equal(attr(as_tabyl(e), "core"), e) # implied success of as_tabyl
-})
-
-test_that("as_tabyl fails if no numeric columns in 2:n", {
- bad <- data.frame(
- a = 1:2,
- b = c("x", "y")
- )
- expect_error(as_tabyl(bad), "at least one one of columns 2:n must be of class numeric")
-})
-
-test_that("bad inputs are caught", {
- expect_error(as_tabyl(mtcars, 3),
- "axes must be either 1 or 2",
- fixed = TRUE
- )
-
- expect_error(as_tabyl(1:10),
- "input must be a data.frame",
- fixed = TRUE
- )
-
- # don't pass names to a 1-way tabyl
- expect_error(
- as_tabyl(mtcars, axes = 1, row_var_name = "foo"),
- "variable names are only meaningful for two-way tabyls"
- )
-})
-
-test_that("adorn_totals and adorn_percentages reset the tabyl's core to reflect sorting, #407", {
- unsorted <- mtcars %>% tabyl(am, cyl)
- sorted <- dplyr::arrange(unsorted, desc(`4`))
- expect_equal(
- sorted %>%
- adorn_totals() %>%
- attr(., "core"),
- sorted %>%
- untabyl()
- )
- expect_equal(
- sorted %>%
- adorn_percentages() %>%
- attr(., "core"),
- sorted %>%
- untabyl()
- )
- # both:
- expect_equal(
- sorted %>%
- adorn_totals() %>%
- adorn_percentages() %>%
- attr(., "core"),
- sorted %>%
- untabyl()
- )
- # Ns with "Total" row sorted to top - the Total N should be up there too:
- expect_equal(
- sorted %>%
- adorn_totals() %>%
- adorn_percentages("col") %>%
- dplyr::arrange(desc(`4`)) %>%
- adorn_ns() %>%
- dplyr::pull(`4`) %>%
- dplyr::first(),
- "1.0000000 (11)"
- )
-})
diff --git a/tests/testthat/test-tabyl.R b/tests/testthat/test-tabyl.R
deleted file mode 100644
index 4226e987..00000000
--- a/tests/testthat/test-tabyl.R
+++ /dev/null
@@ -1,507 +0,0 @@
-# Tests for data.frame renaming function
-
-cyl_tbl <- tabyl(mtcars$cyl)
-
-test_that("counts are accurate", {
- expect_equal(cyl_tbl$`mtcars$cyl`, c(4, 6, 8))
- expect_equal(cyl_tbl$n, c(11, 7, 14))
-})
-
-test_that("percentages are accurate", {
- expect_equal(cyl_tbl$percent, c(11 / 32, 7 / 32, 14 / 32))
-})
-
-# Character input, with and without NA
-test_df <- data.frame(grp = c("a", "b", "b", "c"), stringsAsFactors = FALSE)
-test_df_na <- data.frame(grp = c("a", "b", "b", "c", NA), stringsAsFactors = FALSE)
-test_res <- tabyl(test_df$grp)
-test_res_na <- tabyl(test_df_na$grp)
-
-test_that("names are right", {
- expect_equal(names(cyl_tbl), c("mtcars$cyl", "n", "percent"))
- expect_equal(names(test_res_na), c("test_df_na$grp", "n", "percent", "valid_percent"))
-})
-
-test_that("named vectors are handled properly", { # issue 144
- x <- c(a = "x", b = "y", c = "z")
- expect_equal(names(tabyl(x))[1], "x")
-})
-
-test_that("NAs handled correctly", {
- expect_equal(test_res_na$percent, c(0.2, 0.4, 0.2, 0.2))
- expect_equal(test_res_na$valid_percent, c(0.25, 0.5, 0.25, NA))
-})
-
-test_that("show_NA = FALSE parameter works, incl. with piped input", {
- resss <- test_res
- names(resss)[1] <- "test_df_na$grp"
- names(attr(resss, "core"))[1] <- "test_df_na$grp"
- expect_equal(
- resss,
- tabyl(test_df_na$grp, show_na = FALSE)
- )
- names(attr(resss, "core"))[1] <- "grp"
- names(resss)[1] <- "grp" # for this next instance, col name changes
- expect_equal(
- resss,
- test_df_na %>% tabyl(grp, show_na = FALSE)
- )
-})
-
-test_that("ordering of result by factor levels is preserved for factors", {
- expect_equal(tabyl(factor(c("x", "y", "z"), levels = c("y", "z", "x")))[[1]], factor(c("y", "z", "x"), levels = c("y", "z", "x")))
-})
-
-# missing factor levels shown, with and without NA
-fac <- iris[["Species"]][70:80] # to get versicolor, not the first alphabetically
-fac_na <- fac
-fac_na[1:2] <- NA
-
-
-test_that("missing factor levels are displayed without NA values", {
- expect_equal(tabyl(fac)[[1]], factor(c("setosa", "versicolor", "virginica"), levels = c("setosa", "versicolor", "virginica")))
- expect_equal(tabyl(fac)[[2]], c(0, 11, 0))
- expect_equal(tabyl(fac)[[3]], c(0, 1, 0))
-})
-
-test_that("missing factor levels are displayed with NA values", {
- expect_equal(tabyl(fac_na)[[1]], factor(c("setosa", "versicolor", "virginica", NA), levels = c("setosa", "versicolor", "virginica")))
- expect_equal(tabyl(fac_na)[[2]], c(0, 9, 0, 2))
- expect_equal(tabyl(fac_na)[[3]], c(0, 9 / 11, 0, 2 / 11))
- expect_equal(tabyl(fac_na)[[4]], c(0, 1, 0, NA))
-})
-
-# piping
-test_that("piping in a data.frame works", {
- x <- tabyl(mtcars$cyl)
- names(x)[1] <- "cyl"
- names(attr(x, "core"))[1] <- "cyl"
- expect_equal(
- x,
- mtcars %>% tabyl(cyl)
- )
-})
-
-
-test_that("column1 stays its original data type per #168, in both resulting tabyl and core", {
- # test character, logical, numeric, factor X both values for show_missing_levels; confirm class in core and in main result
- # do those 8 tests in a loop?
- loop_df <- data.frame(
- a = c(TRUE, FALSE, TRUE),
- b = c("x", "y", "y"),
- c = c(1, 1, 2), stringsAsFactors = FALSE
- )
- for (i in c("logical", "numeric", "character")) {
- for (j in c(TRUE, FALSE)) {
- loop_df_temp <- loop_df
- class(loop_df_temp$a) <- i
- loop_tab <- loop_df_temp %>% tabyl(a, b, c, show_missing_levels = j)
- expect_equal(class(loop_tab[[1]]$a), class(loop_df_temp$a))
- expect_equal(class(attr(loop_tab[[1]], "core")$a), class(loop_df_temp$a)) # check core class
- }
- }
- loop_df$a <- factor(c("hi", "lo", "hi"))
- for (j in c(TRUE, FALSE)) {
- loop_df_temp <- loop_df
- loop_tab <- loop_df_temp %>% tabyl(a, b, c, show_missing_levels = j)
- expect_equal(class(loop_tab[[1]]$a), class(loop_df_temp$a))
- expect_equal(levels(loop_tab[[1]]$a), levels(loop_df_temp$a))
- expect_equal(class(attr(loop_tab[[1]], "core")$a), class(loop_df_temp$a)) # check core class and levels
- expect_equal(levels(attr(loop_tab[[1]], "core")$a), levels(loop_df_temp$a))
- }
-})
-
-# bad inputs
-
-test_that("failure occurs when passed unsupported types", {
- expect_error(tabyl(matrix(1:10, nrow = 5)), "input must be a vector of type logical, numeric, character, list, or factor")
- expect_error(tabyl(complex(10)), "input must be a vector of type logical, numeric, character, list, or factor")
-})
-
-test_that("bad input variable name is preserved", {
- expect_equal(
- mtcars %>% dplyr::mutate(`bad name` = cyl) %>% tabyl(`bad name`) %>% names() %>% .[[1]],
- "bad name"
- )
- k <- mtcars %>% dplyr::mutate(`bad name` = cyl)
- expect_equal(
- tabyl(k$`bad name`) %>% names() %>% .[[1]],
- "k$`bad name`"
- )
-})
-
-
-test_that("input variable names 'percent' and 'n' are handled", {
- a <- mtcars %>% tabyl(mpg)
- expect_equal(
- a %>% tabyl(percent),
- as_tabyl(
- data.frame(
- percent = c(1 / 32, 2 / 32),
- n = c(18, 7),
- percent_percent = c(18 / 25, 7 / 25)
- ),
- 1
- )
- )
- expect_equal(
- a %>% tabyl(n),
- as_tabyl(
- data.frame(
- n = 1:2,
- n_n = c(18, 7),
- percent = c(18 / 25, 7 / 25)
- ),
- 1
- )
- )
-})
-
-test_that("bizarre combination of %>%, quotes, and spaces in names is handled", {
- dat <- data.frame(
- `The candidate(s) applied directly to my school` = c("a", "b", "a", "b"),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
-
- expect_equal(
- tabyl(dat$`The candidate(s) applied directly to my school` %>% gsub("hi", "there", .)) %>%
- names() %>%
- .[1],
- "dat$`The candidate(s) applied directly to my school` %>% gsub(\"hi\", \"there\", .)"
- )
-})
-
-test_that("grouped data.frame inputs are handled (#125)", {
- expect_equal(
- mtcars %>% dplyr::group_by(cyl) %>% tabyl(carb, gear),
- mtcars %>% tabyl(carb, gear)
- )
-})
-
-
-test_that("if called on non-existent vector, returns useful error message", {
- expect_error(tabyl(mtcars$moose), "object mtcars\\$moose not found")
- expect_error(tabyl(moose), "object 'moose' not found")
- expect_error(mtcars %>% tabyl(moose))
-})
-
-test_that("if called on data.frame with no or irregular columns specified, returns informative error message", {
- expect_error(tabyl(mtcars), "if calling on a data.frame, specify unquoted column names(s) to tabulate. Did you mean to call tabyl() on a vector?",
- fixed = TRUE
- )
- expect_error(tabyl(mtcars, var2 = am),
- "please specify var1 OR var1 & var2 OR var1 & var2 & var3",
- fixed = TRUE
- )
-})
-
-test_that("fails if called on a non-data.frame list", { # it's not meant to do this and result will likely be garbage, so fail
- L <- list(a = 1, b = "rstats")
- expect_error(tabyl(L),
- "tabyl() is meant to be called on vectors and data.frames; convert non-data.frame lists to one of these types",
- fixed = TRUE
- )
-})
-
-# showing missing factor levels
-
-test_that("show_missing_levels parameter works", {
- z <- structure(
- list(
- a = structure(1, .Label = c("hi", "lo"), class = "factor"),
- b = structure(2, .Label = c("big", "small"), class = "factor"),
- new = structure(1, .Label = c("lvl1", "lvl2"), class = "factor")
- ),
- row.names = c(NA, -1L), class = c("tbl_df", "tbl", "data.frame"),
- .Names = c("a", "b", "new")
- )
-
- expect_equal(
- z %>% tabyl(a, b, new, show_missing_levels = TRUE),
- list(lvl1 = data.frame(
- a = c("hi", "lo"),
- big = c(0, 0),
- small = c(1, 0),
- stringsAsFactors = TRUE
- ) %>% as_tabyl(2, "a", "b"))
- )
- expect_equal(
- z %>% tabyl(a, b, new, show_missing_levels = FALSE) %>% .[[1]],
- data.frame(
- a = factor("hi", levels = c("hi", "lo")),
- small = c(1)
- ) %>% as_tabyl(2, "a", "b")
- )
-
- # Works with numerics
- expect_equal(
- mtcars %>% tabyl(cyl, am),
- data.frame(
- cyl = c(4, 6, 8),
- `0` = c(3, 4, 12),
- `1` = c(8, 3, 2),
- check.names = FALSE
- ) %>% as_tabyl(2, "cyl", "am")
- )
-})
-
-# NA handling - position and removal
-# Putting this outside the following test block for later re-use
-x <- data.frame(
- a = c(1, 2, 2, 2, 1, 1, 1, NA, NA, 1),
- b = c(rep("up", 4), rep("down", 4), NA, NA),
- c = 10,
- d = c(NA, 10:2),
- stringsAsFactors = FALSE
-)
-
-test_that("NA levels get moved to the last column in the data.frame, are suppressed properly", {
- y <- tabyl(x, a, b) %>%
- untabyl()
- expect_equal(
- y,
- data.frame(
- a = c(1, 2, NA),
- down = c(3, 0, 1),
- up = c(1, 3, 0),
- NA_ = c(1, 0, 1)
- )
- )
-
- expect_equal(
- tabyl(x, a, b, show_na = FALSE) %>%
- untabyl(),
- data.frame(
- a = c(1, 2),
- down = c(3, 0),
- up = c(1, 3)
- )
- )
-
- # one-way suppression
- expect_equal(
- tabyl(x$a, show_na = FALSE) %>%
- untabyl(),
- data.frame(
- `x$a` = 1:2,
- n = c(5, 3),
- percent = c(0.625, 0.375),
- check.names = FALSE
- )
- )
-
- # NA level is shown in 3 way split
- y <- x %>% tabyl(c, a, b, show_missing_levels = FALSE)
- expect_equal(length(y), 3)
- expect_equal(names(y), c("down", "up", "NA_"))
- expect_equal(
- y[["NA_"]], # column c remains numeric
- x %>%
- dplyr::filter(is.na(b)) %>%
- tabyl(c, a)
- )
-
- y_with_missing <- x %>% tabyl(c, a, b, show_missing_levels = TRUE)
- expect_equal(length(y_with_missing), 3)
- expect_equal(names(y_with_missing), c("down", "up", "NA_"))
- expect_equal(
- y_with_missing[["NA_"]] %>% untabyl(), # column c remains numeric
- data.frame(c = 10, `1` = 1, `2` = 0, NA_ = 1, check.names = FALSE)
- )
- # If no NA in 3rd variable, it doesn't appear in split list
- expect_equal(length(dplyr::starwars %>%
- dplyr::filter(species == "Human") %>%
- tabyl(eye_color, skin_color, gender, show_missing_levels = TRUE)), 2)
-
- # If there is NA, it does appear in split list
- expect_equal(length(dplyr::starwars %>%
- tabyl(eye_color, skin_color, gender, show_missing_levels = TRUE)), 3)
- expect_equal(length(dplyr::starwars %>%
- tabyl(eye_color, skin_color, gender, show_missing_levels = FALSE)), 3)
-
- # NA level in the list gets suppressed if show_na = FALSE. Should have one less level if NA is suppressed.
- expect_equal(length(dplyr::starwars %>%
- tabyl(eye_color, skin_color, gender, show_na = TRUE)), 3)
- expect_equal(length(dplyr::starwars %>%
- tabyl(eye_color, skin_color, gender, show_na = FALSE)), 2)
-})
-
-test_that("tabyl fill 0s with show_missing_levels = FALSE", {
- res <- x %>% tabyl(a, b, show_missing_levels = FALSE)
- got <- data.frame(a = c(1, 2, NA), down = c(3L, 0L, 1L), up = c(1L, 3L, 0L), NA_ = c(1L, 0L, 1L)) %>%
- structure(
- class = c("tabyl", "data.frame"),
- core = data.frame(a = c(1, 2, NA), down = c(3L, 0L, 1L), up = c(1L, 3L, 0L), NA_ = c(1L, 0L, 1L)),
- tabyl_type = "two_way",
- var_names = list(row = "a", col = "b")
- )
- expect_equal(res, got)
-})
-
-test_that("zero-row and fully-NA inputs are handled", {
- zero_vec <- character(0)
- expect_equal(nrow(tabyl(zero_vec)), 0)
- expect_equal(names(tabyl(zero_vec)), c("zero_vec", "n", "percent"))
-
- zero_df <- data.frame(a = character(0), b = character(0))
- expect_message(
- expect_equal(nrow(tabyl(zero_df, a, b)), 0)
- )
- expect_message(
- expect_equal(names(tabyl(zero_df, a, b)), "a"),
- "No records to count so returning a zero-row tabyl"
- )
-
- all_na_df <- data.frame(a = c(NA, NA), b = c(NA_character_, NA_character_))
- expect_message(
- expect_equal(tabyl(all_na_df, a, b, show_na = FALSE) %>% nrow(), 0)
- )
- expect_message(
- expect_equal(tabyl(all_na_df, a, b, show_na = FALSE) %>% names(), "a"),
- "No records to count so returning a zero-row tabyl"
- )
-})
-
-test_that("print.tabyl prints without row numbers", {
- expect_equal(
- mtcars %>% tabyl(am, cyl) %>% capture.output(),
- c(" am 4 6 8", " 0 3 4 12", " 1 8 3 2")
- )
-})
-
-test_that("the dplyr warning suggesting forcats::fct_explicit_na that is generated by a tabyl of a factor with NA values is caught ", {
- # leaving this in as I'd want to know if it ever gets loud again, but the warning seems to be gone in
- # dplyr 1.0.0 and I have removed the withCallingHandlers({}) code in tabyl() that this was testing
- expect_silent(
- tabyl(factor(c("a", "b", NA)))
- )
- xx <- data.frame(
- a = factor(c("a", "b", NA)),
- b = 1:3
- )
- expect_silent(xx %>%
- tabyl(a, b))
-})
-
-test_that("3-way tabyl with 3rd var factor is listed in right order, #250", {
- z <- mtcars
- z$cyl <- factor(z$cyl, levels = c(4, 8, 6))
- expect_equal(names(tabyl(z, am, gear, cyl)), c("4", "8", "6"))
- z$cyl[32] <- NA
- expect_equal(names(tabyl(z, am, gear, cyl)), c("4", "8", "6", "NA_"))
- expect_equal(names(tabyl(z, am, gear, cyl, show_na = FALSE)), c("4", "8", "6"))
- z <- z %>% dplyr::filter(!cyl %in% "4")
- expect_equal(names(tabyl(z, am, gear, cyl)), c("8", "6", "NA_"))
-})
-
-test_that("tabyl works with label attributes (#394)", {
- mt_label <- mtcars
- attr(mt_label$cyl, "label") <- "Number of cyl"
- tab <- tabyl(mt_label, cyl)
- expect_named(tab, c("Number of cyl", "n", "percent"))
- tab2 <- tabyl(mt_label, cyl, am)
- expect_named(tab2, c("Number of cyl", "0", "1"))
- tab3 <- tabyl(mt_label, cyl, am, vs)
- expect_equal(names(tab3[[1]])[1], "Number of cyl")
-})
-
-test_that("tabyl works with ordered 1st variable, #386", {
- mt_ordered <- mtcars
- mt_ordered$cyl <- ordered(mt_ordered$cyl, levels = c("4", "8", "6"))
-
- ordered_3way <- mt_ordered %>%
- tabyl(cyl, gear, am)
- expect_equal(class(ordered_3way[[1]]$cyl), c("ordered", "factor")) # 1st col in resulting tabyl
- expect_equal(class(attr(ordered_3way[[1]], "core")$cyl), c("ordered", "factor")) # 1st col in tabyl core
-})
-
-test_that("factor ordering of columns is correct in 2-way tabyl", {
- two_factors <- data.frame(
- x = factor(c("big", "small", "medium", "small"),
- levels = c("small", "medium", "big")
- ),
- y = factor(c("hi", "hi", "hi", "lo"),
- levels = c("lo", "hi")
- )
- )
- expect_equal(
- two_factors %>%
- tabyl(x, y) %>%
- names(),
- c("x", "lo", "hi")
- )
-})
-
-test_that("empty strings converted to _emptystring", {
- mt_empty <- mtcars
- mt_empty$cyl[1:2] <- c("", NA_character_)
- expect_equal(
- mt_empty %>%
- tabyl(am, cyl) %>%
- names(),
- c("am", "4", "6", "8", "emptystring_", "NA_")
- )
-})
-
-test_that("3way tabyls with factors in cols 1-2 are arranged correctly, #379", {
- dat_3wayfactors <- data.frame(
- gender = c("f", "m", "m", "f", "m"),
- age_group = factor(
- c("18-35", "46-55", "46-55", "36-45", ">55"),
- levels = c("18-35", "36-45", "46-55", ">55")
- ),
- bmi_group = factor(
- c("18.5 - 25", "25 - 30", "18.5 - 25", ">30", "<18.5"),
- levels = c("<18.5", "18.5 - 25", "25 - 30", ">30")
- ),
- stringsAsFactors = TRUE
- )
-
- tabyl_3wf <- dat_3wayfactors %>%
- tabyl(bmi_group, age_group, gender, show_missing_levels = FALSE)
-
- expect_equal(names(tabyl_3wf$m), c("bmi_group", "46-55", ">55"))
- expect_equal(
- tabyl_3wf$f[[1]],
- factor(
- c("18.5 - 25", ">30"),
- levels = c("<18.5", "18.5 - 25", "25 - 30", ">30")
- )
- )
-})
-
-test_that("tabyl errors informatively called like tabyl(mtcars$cyl, mtcars$gear), #377", {
- expect_error(
- tabyl(mtcars$cyl, mtcars$am),
- regexp = "Did you try to call tabyl on two vectors"
- )
- has_logicals <- data.frame(x = 1:2, y = c(TRUE, FALSE))
- expect_error(
- tabyl(has_logicals$x, has_logicals$y),
- regexp = "Did you try to call tabyl on two vectors"
- )
- expect_type(
- has_logicals %>%
- tabyl(x, y),
- "list"
- )
-})
-
-test_that("2-way tabyl with numeric column names is sorted numerically", {
- df <- data.frame(var1 = c(1:11), var2 = c(NA, 10:1))
- expect_equal(colnames(df %>% tabyl(var1, var2)), c("var1", 1:10, "NA_"))
-})
-
-test_that("3-way tabyl with numeric names is sorted numerically", {
- expect_equal(
- names(mtcars %>% tabyl(gear, cyl, hp)),
- as.character(sort(unique(mtcars$hp)))
- )
-
- # Check putting NA last - data.frame "x" is created way above
- expect_equal(
- names(x %>% tabyl(a, c, d)),
- c(2:10, "NA_")
- )
-})
diff --git a/tests/testthat/test-top-levels.R b/tests/testthat/test-top-levels.R
deleted file mode 100644
index f90b0c19..00000000
--- a/tests/testthat/test-top-levels.R
+++ /dev/null
@@ -1,63 +0,0 @@
-fac <- factor(c("a", "b", "c", "d", "e", "f", "f"), levels = rev(letters[1:6]))
-fac_odd_lvls <- factor(fac, levels = rev(letters[1:5]))
-
-# more tests - group names and ordering - are in test-get-level-groups.R
-test_that("top_levels values are correct", {
- expect_equal(top_levels(fac)[[3]], c(3 / 7, 2 / 7, 2 / 7)) # default n = 2, num_levels = 6
- expect_equal(top_levels(fac)[[2]], c(3, 2, 2))
- expect_equal(top_levels(fac, 3)[[3]], c(4 / 7, 3 / 7)) # n = 3, num_levels = 6
- expect_equal(top_levels(fac, 3)[[2]], c(4, 3))
- expect_equal(top_levels(fac_odd_lvls)[[2]], c(2, 1, 2)) # default n = 2, num_levels = 5
- expect_equal(top_levels(fac_odd_lvls)[[3]], c(0.4, 0.2, 0.4))
- expect_equal(top_levels(fac_odd_lvls, 1)[[2]], c(1, 3, 1)) # n = 1, num_levels = 5
- expect_equal(top_levels(fac_odd_lvls, 1)[[3]], c(0.2, 0.6, 0.2))
-})
-
-test_that("top_levels missing levels are represented", {
- x <- as.factor(letters[1:5])[1:3]
- expect_equal(
- top_levels(x)[[1]],
- structure(1:3, .Label = c("a, b", "c", "d, e"), class = "factor")
- )
- expect_equal(
- top_levels(x)[[2]],
- c(2, 1, 0)
- )
-})
-
-
-test_that("top_levels NA results are treated appropriately", {
- fac_na <- fac
- fac_na[7] <- NA
- expect_equal(top_levels(fac_na)[[2]], rep(2, 3))
- expect_equal(top_levels(fac_na, show_na = TRUE)[[2]], c(2, 2, 2, 1))
- expect_equal(top_levels(fac_na, show_na = TRUE)[[3]], c(2 / 7, 2 / 7, 2 / 7, 1 / 7))
- expect_equal(top_levels(fac_na, show_na = TRUE)[[4]], c(1 / 3, 1 / 3, 1 / 3, NA))
-})
-
-test_that("top_levels default n parameter works", {
- expect_equal(top_levels(fac), top_levels(fac, 2))
-})
-
-test_that("top_levels missing levels are treated appropriately", {
- fac_missing_lvl <- fac
- fac_missing_lvl[2] <- NA
- expect_equal(top_levels(fac_missing_lvl)[[2]], c(3, 2, 1))
-})
-
-test_that("top_levels bad type inputs are handled", {
- expect_error(top_levels(c(0, 1), "factor_vec is not of type 'factor'"))
- expect_error(top_levels(c("hi", "lo"), "factor_vec is not of type 'factor'"))
- expect_error(top_levels(mtcars, "factor_vec is not of type 'factor'"))
-})
-
-test_that("top_levels bad n value is handled", {
- expect_error(top_levels(fac, 4))
- expect_error(top_levels(fac_odd_lvls, 3))
- expect_error(top_levels(fac, 0))
- expect_error(top_levels(factor(c("a", "b"))), "input factor variable must have at least 3 levels")
-})
-
-test_that("top_levels correct variable name assigned to first column of result", {
- expect_equal(names(top_levels(fac))[1], "fac")
-})
diff --git a/vignettes/tabyls.Rmd b/vignettes/tabyls.Rmd
deleted file mode 100644
index 31908572..00000000
--- a/vignettes/tabyls.Rmd
+++ /dev/null
@@ -1,271 +0,0 @@
----
-title: "tabyls: a tidy, fully-featured approach to counting things"
-date: '`r Sys.Date()`'
-output:
- rmarkdown::github_document
-vignette: >
- %\VignetteIndexEntry{tabyls}
- %\VignetteEngine{knitr::rmarkdown}
- %\VignetteEncoding{UTF-8}
----
-
-```{r chunk_options, include = FALSE}
-knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
-```
-
-## Motivation: why tabyl?
-
-Analysts do a lot of counting. Indeed, it's been said that "data science is mostly counting things." But the base R function for counting, `table()`, leaves much to be desired:
-
-- It doesn't accept data.frame inputs (and thus doesn't play nicely with the `%>%` pipe)
-- It doesn't output data.frames
-- Its results are hard to format. Compare the look and formatting choices of an R table to a Microsoft Excel PivotTable or even the table formatting provided by SPSS.
-
-`tabyl()` is an approach to tabulating variables that addresses these shortcomings. It's part of the janitor package because counting is such a fundamental part of data cleaning and exploration.
-
-`tabyl()` is tidyverse-aligned and is primarily built upon the dplyr and tidyr packages.
-
-## How it works
-
-On its surface, `tabyl()` produces frequency tables using 1, 2, or 3 variables. Under the hood, `tabyl()` also attaches a copy of these counts as an attribute of the resulting data.frame.
-
-The result looks like a basic data.frame of counts, but because it's also a `tabyl` containing this metadata, you can use `adorn_` functions to add additional information and pretty formatting.
-
-The `adorn_` functions are built to work on `tabyls`, but have been adapted to work with similar, non-tabyl data.frames that need formatting.
-
-# Examples
-This vignette demonstrates `tabyl` in the context of studying humans in the `starwars` dataset from dplyr:
-```{r clean_starwars, warning = FALSE, message = FALSE}
-library(dplyr)
-humans <- starwars %>%
- filter(species == "Human")
-```
-
-
-## One-way tabyl
-
-Tabulating a single variable is the simplest kind of tabyl:
-
-```{r one_way, message=FALSE}
-library(janitor)
-
-t1 <- humans %>%
- tabyl(eye_color)
-
-t1
-```
-
-
-When `NA` values are present, `tabyl()` also displays "valid" percentages, i.e., with missing values removed from the denominator. And while `tabyl()` is built to take a data.frame and column names, you can also produce a one-way tabyl by calling it directly on a vector:
-
-```{r one_way_vector}
-x <- c("big", "big", "small", "small", "small", NA)
-tabyl(x)
-```
-
-
-Most `adorn_` helper functions are built for 2-way tabyls, but those that make sense for a 1-way tabyl do work:
-```{r one_way_adorns}
-t1 %>%
- adorn_totals("row") %>%
- adorn_pct_formatting()
-```
-
-
-## Two-way tabyl
-
-This is often called a "crosstab" or "contingency" table. Calling `tabyl` on two columns of a data.frame produces the same result as the common combination of `dplyr::count()`, followed by `tidyr::pivot_wider()` to wide form:
-
-```{r two_way}
-t2 <- humans %>%
- tabyl(gender, eye_color)
-
-t2
-```
-
-Since it's a `tabyl`, we can enhance it with `adorn_` helper functions. For instance:
-
-```{r two_way_adorns}
-t2 %>%
- adorn_percentages("row") %>%
- adorn_pct_formatting(digits = 2) %>%
- adorn_ns()
-```
-
-Adornments have options to control axes, rounding, and other relevant formatting choices (more on that below).
-
-## Three-way tabyl
-
-Just as `table()` accepts three variables, so does `tabyl()`, producing a list of tabyls:
-
-```{r three_Way}
-t3 <- humans %>%
- tabyl(eye_color, skin_color, gender)
-
-# the result is a tabyl of eye color x skin color, split into a list by gender
-t3
-```
-
-If the `adorn_` helper functions are called on a list of data.frames - like the output of a three-way `tabyl` call - they will call `purrr::map()` to apply themselves to each data.frame in the list:
-
-```{r three_way_adorns, warning = FALSE, message = FALSE}
-library(purrr)
-humans %>%
- tabyl(eye_color, skin_color, gender, show_missing_levels = FALSE) %>%
- adorn_totals("row") %>%
- adorn_percentages("all") %>%
- adorn_pct_formatting(digits = 1) %>%
- adorn_ns() %>%
- adorn_title()
-```
-
-This automatic mapping supports interactive data analysis that switches between combinations of 2 and 3 variables. That way, if a user starts with `humans %>% tabyl(eye_color, skin_color)`, adds some `adorn_` calls, then decides to split the tabulation by gender and modifies their first line to `humans %>% tabyl(eye_color, skin_color, gender`), they don't have to rewrite the subsequent adornment calls to use `map()`.
-
-However, if feels more natural to call these with `map()` or `lapply()`, that is still supported. For instance, `t3 %>% lapply(adorn_percentages)` would produce the same result as `t3 %>% adorn_percentages`.
-
-### Other features of tabyls
-
-+ When called on a factor, `tabyl` will show missing levels (levels not present in the data) in the result
- + This can be suppressed if not desired
-+ `NA` values can be displayed or suppressed
-+ `tabyls` print without displaying row numbers
-
-You can call `chisq.test()` and `fisher.test()` on a two-way tabyl to perform those statistical tests, just like on a base R `table()` object.
-
-## The `adorn_*` functions
-
-These modular functions build on a `tabyl` to approximate the functionality of a PivotTable in Microsoft Excel. They print elegant results for interactive analysis or for sharing in a report, e.g., with `knitr::kable()`. For example:
-
-```{r}
-humans %>%
- tabyl(gender, eye_color) %>%
- adorn_totals(c("row", "col")) %>%
- adorn_percentages("row") %>%
- adorn_pct_formatting(rounding = "half up", digits = 0) %>%
- adorn_ns() %>%
- adorn_title("combined") %>%
- knitr::kable()
-```
-
-### The adorn functions are:
-
-+ **`adorn_totals()`**: Add totals row, column, or both.
-+ **`adorn_percentages()`**: Calculate percentages along either axis or over the entire tabyl
-+ **`adorn_pct_formatting()`**: Format percentage columns, controlling the number of digits to display and whether to append the `%` symbol
-+ **`adorn_rounding()`**: Round a data.frame of numbers (usually the result of `adorn_percentages`), either using the base R `round()` function or using janitor's `round_half_up()` to round all ties up ([thanks, StackOverflow](https://stackoverflow.com/a/12688836/4470365)).
- + e.g., round 10.5 up to 11, consistent with Excel's tie-breaking behavior.
- + This contrasts with rounding 10.5 down to 10 as in base R's `round(10.5)`.
- + `adorn_rounding()` returns columns of class `numeric`, allowing for graphing, sorting, etc. It's a less-aggressive substitute for `adorn_pct_formatting()`; these two functions should not be called together.
-+ **`adorn_ns()`**: add Ns to a tabyl. These can be drawn from the tabyl's underlying counts, which are attached to the tabyl as metadata, or they can be supplied by the user.
-+ **`adorn_title()`**: add a title to a tabyl (or other data.frame). Options include putting the column title in a new row on top of the data.frame or combining the row and column titles in the data.frame's first name slot.
-
-
-These adornments should be called in a logical order, e.g., you probably want to add totals before percentages are calculated. In general, call them in the order they appear above.
-
-## BYOt (Bring Your Own tabyl)
-
-You can also call `adorn_` functions on other data.frames, not only the results of calls to `tabyl()`. E.g., `mtcars %>% adorn_totals("col") %>% adorn_percentages("col")` performs as expected, despite `mtcars` not being a `tabyl`.
-
-This can be handy when you have a data.frame that is not a simple tabulation generated by `tabyl` but would still benefit from the `adorn_` formatting functions.
-
-A simple example: calculate the proportion of records meeting a certain condition, then format the results.
-
-```{r first_non_tabyl}
-percent_above_165_cm <- humans %>%
- group_by(gender) %>%
- summarise(pct_above_165_cm = mean(height > 165, na.rm = TRUE), .groups = "drop")
-
-percent_above_165_cm %>%
- adorn_pct_formatting()
-```
-
-You can control which columns are adorned by using the `...` argument. It accepts the [tidyselect helpers](https://r4ds.had.co.nz/transform.html#select). That is, you can specify columns the same way you would using `dplyr::select()`.
-
-For instance, say you have a numeric column that should not be included in percentage formatting and you wish to exempt it. Here, only the `proportion` column is adorned:
-
-```{r tidyselect, warning = FALSE, message = FALSE}
-mtcars %>%
- count(cyl, gear) %>%
- rename(proportion = n) %>%
- adorn_percentages("col", na.rm = TRUE, proportion) %>%
- adorn_pct_formatting(, , , proportion) # the commas say to use the default values of the other arguments
-```
-
-Here we specify that only two consecutive numeric columns should be totaled (`year` is numeric but should not be included):
-
-```{r dont_total, warning = FALSE, message = FALSE}
-cases <- data.frame(
- region = c("East", "West"),
- year = 2015,
- recovered = c(125, 87),
- died = c(13, 12)
-)
-
-cases %>%
- adorn_totals(c("col", "row"), fill = "-", na.rm = TRUE, name = "Total Cases", recovered:died)
-```
-
-Here's a more complex example that uses a data.frame of means, not counts. We create a table containing the mean of a 3rd variable when grouped by two other variables, then use `adorn_` functions to round the values and append Ns. The first part is pretty straightforward:
-
-```{r more_non_tabyls, warning = FALSE, message = FALSE}
-library(tidyr) # for pivot_wider()
-mpg_by_cyl_and_am <- mtcars %>%
- group_by(cyl, am) %>%
- summarise(mpg = mean(mpg), .groups = "drop") %>%
- pivot_wider(names_from = am, values_from = mpg)
-
-mpg_by_cyl_and_am
-```
-
-Now to `adorn_` it. Since this is not the result of a `tabyl()` call, it doesn't have the underlying Ns stored in the `core` attribute, so we'll have to supply them:
-```{r add_the_Ns}
-mpg_by_cyl_and_am %>%
- adorn_rounding() %>%
- adorn_ns(
- ns = mtcars %>% # calculate the Ns on the fly by calling tabyl on the original data
- tabyl(cyl, am)
- ) %>%
- adorn_title("combined", row_name = "Cylinders", col_name = "Is Automatic")
-```
-
-If needed, Ns can be manipulated in their own data.frame before they are appended. Here a tabyl with values in the thousands has its Ns formatted to include the separating character `,` as typically seen in American numbers, e.g., `3,000`.
-
-First we create the tabyl to adorn:
-
-```{r formatted_Ns_thousands_prep}
-set.seed(1)
-raw_data <- data.frame(
- sex = rep(c("m", "f"), 3000),
- age = round(runif(3000, 1, 102), 0)
-)
-raw_data$agegroup <- cut(raw_data$age, quantile(raw_data$age, c(0, 1 / 3, 2 / 3, 1)))
-
-comparison <- raw_data %>%
- tabyl(agegroup, sex, show_missing_levels = FALSE) %>%
- adorn_totals(c("row", "col")) %>%
- adorn_percentages("col") %>%
- adorn_pct_formatting(digits = 1)
-
-comparison
-```
-
-At this point, the Ns are unformatted:
-```{r adorn_ns_unformatted}
-comparison %>%
- adorn_ns()
-```
-
-Now we format them to insert the thousands commas. A tabyl's raw Ns are stored in its `"core"` attribute. Here we retrieve those with `attr()`, then apply the base R function `format()` to all numeric columns. Lastly, we append these Ns using `adorn_ns()`.
-
-```{r formatted_Ns_thousands}
-formatted_ns <- attr(comparison, "core") %>% # extract the tabyl's underlying Ns
- adorn_totals(c("row", "col")) %>% # to match the data.frame we're appending to
- dplyr::mutate(across(where(is.numeric), ~ format(.x, big.mark = ",")))
-
-comparison %>%
- adorn_ns(position = "rear", ns = formatted_ns)
-```
-
-### Questions? Comments?
-
-File [an issue on GitHub](https://github.com/sfirke/janitor/issues) if you have suggestions related to `tabyl()` and its `adorn_` helpers or encounter problems while using them.
diff --git a/vignettes/tabyls.md b/vignettes/tabyls.md
deleted file mode 100644
index ea526931..00000000
--- a/vignettes/tabyls.md
+++ /dev/null
@@ -1,454 +0,0 @@
-tabyls: a tidy, fully-featured approach to counting things
-================
-2023-02-03
-
-## Motivation: why tabyl?
-
-Analysts do a lot of counting. Indeed, it’s been said that “data science
-is mostly counting things.” But the base R function for counting,
-`table()`, leaves much to be desired:
-
-- It doesn’t accept data.frame inputs (and thus doesn’t play nicely with
- the `%>%` pipe)
-- It doesn’t output data.frames
-- Its results are hard to format. Compare the look and formatting
- choices of an R table to a Microsoft Excel PivotTable or even the
- table formatting provided by SPSS.
-
-`tabyl()` is an approach to tabulating variables that addresses these
-shortcomings. It’s part of the janitor package because counting is such
-a fundamental part of data cleaning and exploration.
-
-`tabyl()` is tidyverse-aligned and is primarily built upon the dplyr and
-tidyr packages.
-
-## How it works
-
-On its surface, `tabyl()` produces frequency tables using 1, 2, or 3
-variables. Under the hood, `tabyl()` also attaches a copy of these
-counts as an attribute of the resulting data.frame.
-
-The result looks like a basic data.frame of counts, but because it’s
-also a `tabyl` containing this metadata, you can use `adorn_` functions
-to add additional information and pretty formatting.
-
-The `adorn_` functions are built to work on `tabyls`, but have been
-adapted to work with similar, non-tabyl data.frames that need
-formatting.
-
-# Examples
-
-This vignette demonstrates `tabyl` in the context of studying humans in
-the `starwars` dataset from dplyr:
-
-``` r
-library(dplyr)
-humans <- starwars %>%
- filter(species == "Human")
-```
-
-## One-way tabyl
-
-Tabulating a single variable is the simplest kind of tabyl:
-
-``` r
-library(janitor)
-
-t1 <- humans %>%
- tabyl(eye_color)
-
-t1
-#> eye_color n percent
-#> blue 12 0.34285714
-#> blue-gray 1 0.02857143
-#> brown 17 0.48571429
-#> dark 1 0.02857143
-#> hazel 2 0.05714286
-#> yellow 2 0.05714286
-```
-
-When `NA` values are present, `tabyl()` also displays “valid”
-percentages, i.e., with missing values removed from the denominator. And
-while `tabyl()` is built to take a data.frame and column names, you can
-also produce a one-way tabyl by calling it directly on a vector:
-
-``` r
-x <- c("big", "big", "small", "small", "small", NA)
-tabyl(x)
-#> x n percent valid_percent
-#> big 2 0.3333333 0.4
-#> small 3 0.5000000 0.6
-#> 1 0.1666667 NA
-```
-
-Most `adorn_` helper functions are built for 2-way tabyls, but those
-that make sense for a 1-way tabyl do work:
-
-``` r
-t1 %>%
- adorn_totals("row") %>%
- adorn_pct_formatting()
-#> eye_color n percent
-#> blue 12 34.3%
-#> blue-gray 1 2.9%
-#> brown 17 48.6%
-#> dark 1 2.9%
-#> hazel 2 5.7%
-#> yellow 2 5.7%
-#> Total 35 100.0%
-```
-
-## Two-way tabyl
-
-This is often called a “crosstab” or “contingency” table. Calling
-`tabyl` on two columns of a data.frame produces the same result as the
-common combination of `dplyr::count()`, followed by
-`tidyr::pivot_wider()` to wide form:
-
-``` r
-t2 <- humans %>%
- tabyl(gender, eye_color)
-
-t2
-#> gender blue blue-gray brown dark hazel yellow
-#> feminine 3 0 5 0 1 0
-#> masculine 9 1 12 1 1 2
-```
-
-Since it’s a `tabyl`, we can enhance it with `adorn_` helper functions.
-For instance:
-
-``` r
-
-t2 %>%
- adorn_percentages("row") %>%
- adorn_pct_formatting(digits = 2) %>%
- adorn_ns()
-#> gender blue blue-gray brown dark hazel yellow
-#> feminine 33.33% (3) 0.00% (0) 55.56% (5) 0.00% (0) 11.11% (1) 0.00% (0)
-#> masculine 34.62% (9) 3.85% (1) 46.15% (12) 3.85% (1) 3.85% (1) 7.69% (2)
-```
-
-Adornments have options to control axes, rounding, and other relevant
-formatting choices (more on that below).
-
-## Three-way tabyl
-
-Just as `table()` accepts three variables, so does `tabyl()`, producing
-a list of tabyls:
-
-``` r
-t3 <- humans %>%
- tabyl(eye_color, skin_color, gender)
-
-# the result is a tabyl of eye color x skin color, split into a list by gender
-t3
-#> $feminine
-#> eye_color dark fair light pale tan white
-#> blue 0 2 1 0 0 0
-#> blue-gray 0 0 0 0 0 0
-#> brown 0 1 4 0 0 0
-#> dark 0 0 0 0 0 0
-#> hazel 0 0 1 0 0 0
-#> yellow 0 0 0 0 0 0
-#>
-#> $masculine
-#> eye_color dark fair light pale tan white
-#> blue 0 7 2 0 0 0
-#> blue-gray 0 1 0 0 0 0
-#> brown 3 4 3 0 2 0
-#> dark 1 0 0 0 0 0
-#> hazel 0 1 0 0 0 0
-#> yellow 0 0 0 1 0 1
-```
-
-If the `adorn_` helper functions are called on a list of data.frames -
-like the output of a three-way `tabyl` call - they will call
-`purrr::map()` to apply themselves to each data.frame in the list:
-
-``` r
-library(purrr)
-humans %>%
- tabyl(eye_color, skin_color, gender, show_missing_levels = FALSE) %>%
- adorn_totals("row") %>%
- adorn_percentages("all") %>%
- adorn_pct_formatting(digits = 1) %>%
- adorn_ns %>%
- adorn_title
-#> $feminine
-#> skin_color
-#> eye_color fair light
-#> blue 22.2% (2) 11.1% (1)
-#> brown 11.1% (1) 44.4% (4)
-#> hazel 0.0% (0) 11.1% (1)
-#> Total 33.3% (3) 66.7% (6)
-#>
-#> $masculine
-#> skin_color
-#> eye_color dark fair light pale tan white
-#> blue 0.0% (0) 26.9% (7) 7.7% (2) 0.0% (0) 0.0% (0) 0.0% (0)
-#> blue-gray 0.0% (0) 3.8% (1) 0.0% (0) 0.0% (0) 0.0% (0) 0.0% (0)
-#> brown 11.5% (3) 15.4% (4) 11.5% (3) 0.0% (0) 7.7% (2) 0.0% (0)
-#> dark 3.8% (1) 0.0% (0) 0.0% (0) 0.0% (0) 0.0% (0) 0.0% (0)
-#> hazel 0.0% (0) 3.8% (1) 0.0% (0) 0.0% (0) 0.0% (0) 0.0% (0)
-#> yellow 0.0% (0) 0.0% (0) 0.0% (0) 3.8% (1) 0.0% (0) 3.8% (1)
-#> Total 15.4% (4) 50.0% (13) 19.2% (5) 3.8% (1) 7.7% (2) 3.8% (1)
-```
-
-This automatic mapping supports interactive data analysis that switches
-between combinations of 2 and 3 variables. That way, if a user starts
-with `humans %>% tabyl(eye_color, skin_color)`, adds some `adorn_`
-calls, then decides to split the tabulation by gender and modifies their
-first line to `humans %>% tabyl(eye_color, skin_color, gender`), they
-don’t have to rewrite the subsequent adornment calls to use `map()`.
-
-However, if feels more natural to call these with `map()` or `lapply()`,
-that is still supported. For instance,
-`t3 %>% lapply(adorn_percentages)` would produce the same result as
-`t3 %>% adorn_percentages`.
-
-### Other features of tabyls
-
-- When called on a factor, `tabyl` will show missing levels (levels not
- present in the data) in the result
- - This can be suppressed if not desired
-- `NA` values can be displayed or suppressed
-- `tabyls` print without displaying row numbers
-
-You can call `chisq.test()` and `fisher.test()` on a two-way tabyl to
-perform those statistical tests, just like on a base R `table()` object.
-
-## The `adorn_*` functions
-
-These modular functions build on a `tabyl` to approximate the
-functionality of a PivotTable in Microsoft Excel. They print elegant
-results for interactive analysis or for sharing in a report, e.g., with
-`knitr::kable()`. For example:
-
-``` r
-humans %>%
- tabyl(gender, eye_color) %>%
- adorn_totals(c("row", "col")) %>%
- adorn_percentages("row") %>%
- adorn_pct_formatting(rounding = "half up", digits = 0) %>%
- adorn_ns() %>%
- adorn_title("combined") %>%
- knitr::kable()
-```
-
-| gender/eye_color | blue | blue-gray | brown | dark | hazel | yellow | Total |
-|:-----------------|:---------|:----------|:---------|:-------|:--------|:-------|:----------|
-| feminine | 33% (3) | 0% (0) | 56% (5) | 0% (0) | 11% (1) | 0% (0) | 100% (9) |
-| masculine | 35% (9) | 4% (1) | 46% (12) | 4% (1) | 4% (1) | 8% (2) | 100% (26) |
-| Total | 34% (12) | 3% (1) | 49% (17) | 3% (1) | 6% (2) | 6% (2) | 100% (35) |
-
-### The adorn functions are:
-
-- **`adorn_totals()`**: Add totals row, column, or both.
-- **`adorn_percentages()`**: Calculate percentages along either axis or
- over the entire tabyl
-- **`adorn_pct_formatting()`**: Format percentage columns, controlling
- the number of digits to display and whether to append the `%` symbol
-- **`adorn_rounding()`**: Round a data.frame of numbers (usually the
- result of `adorn_percentages`), either using the base R `round()`
- function or using janitor’s `round_half_up()` to round all ties up
- ([thanks,
- StackOverflow](https://stackoverflow.com/a/12688836/4470365)).
- - e.g., round 10.5 up to 11, consistent with Excel’s tie-breaking
- behavior.
- - This contrasts with rounding 10.5 down to 10 as in base R’s
- `round(10.5)`.
- - `adorn_rounding()` returns columns of class `numeric`, allowing for
- graphing, sorting, etc. It’s a less-aggressive substitute for
- `adorn_pct_formatting()`; these two functions should not be called
- together.
-- **`adorn_ns()`**: add Ns to a tabyl. These can be drawn from the
- tabyl’s underlying counts, which are attached to the tabyl as
- metadata, or they can be supplied by the user.
-- **`adorn_title()`**: add a title to a tabyl (or other data.frame).
- Options include putting the column title in a new row on top of the
- data.frame or combining the row and column titles in the data.frame’s
- first name slot.
-
-These adornments should be called in a logical order, e.g., you probably
-want to add totals before percentages are calculated. In general, call
-them in the order they appear above.
-
-## BYOt (Bring Your Own tabyl)
-
-You can also call `adorn_` functions on other data.frames, not only the
-results of calls to `tabyl()`. E.g.,
-`mtcars %>% adorn_totals("col") %>% adorn_percentages("col")` performs
-as expected, despite `mtcars` not being a `tabyl`.
-
-This can be handy when you have a data.frame that is not a simple
-tabulation generated by `tabyl` but would still benefit from the
-`adorn_` formatting functions.
-
-A simple example: calculate the proportion of records meeting a certain
-condition, then format the results.
-
-``` r
-percent_above_165_cm <- humans %>%
- group_by(gender) %>%
- summarise(pct_above_165_cm = mean(height > 165, na.rm = TRUE), .groups = "drop")
-
-percent_above_165_cm %>%
- adorn_pct_formatting()
-#> # A tibble: 2 × 2
-#> gender pct_above_165_cm
-#>
-#> 1 feminine 12.5%
-#> 2 masculine 100.0%
-```
-
-You can control which columns are adorned by using the `...` argument.
-It accepts the [tidyselect
-helpers](https://r4ds.had.co.nz/transform.html#select). That is, you can
-specify columns the same way you would using `dplyr::select()`.
-
-For instance, say you have a numeric column that should not be included
-in percentage formatting and you wish to exempt it. Here, only the
-`proportion` column is adorned:
-
-``` r
-mtcars %>%
- count(cyl, gear) %>%
- rename(proportion = n) %>%
- adorn_percentages("col", na.rm = TRUE, proportion) %>%
- adorn_pct_formatting(,,,proportion) # the commas say to use the default values of the other arguments
-#> cyl gear proportion
-#> 4 3 3.1%
-#> 4 4 25.0%
-#> 4 5 6.2%
-#> 6 3 6.2%
-#> 6 4 12.5%
-#> 6 5 3.1%
-#> 8 3 37.5%
-#> 8 5 6.2%
-```
-
-Here we specify that only two consecutive numeric columns should be
-totaled (`year` is numeric but should not be included):
-
-``` r
-cases <- data.frame(
- region = c("East", "West"),
- year = 2015,
- recovered = c(125, 87),
- died = c(13, 12)
-)
-
-cases %>%
- adorn_totals(c("col", "row"), fill = "-", na.rm = TRUE, name = "Total Cases", recovered:died)
-#> region year recovered died Total Cases
-#> East 2015 125 13 138
-#> West 2015 87 12 99
-#> Total Cases - 212 25 237
-```
-
-Here’s a more complex example that uses a data.frame of means, not
-counts. We create a table containing the mean of a 3rd variable when
-grouped by two other variables, then use `adorn_` functions to round the
-values and append Ns. The first part is pretty straightforward:
-
-``` r
-library(tidyr) # for pivot_wider()
-mpg_by_cyl_and_am <- mtcars %>%
- group_by(cyl, am) %>%
- summarise(mpg = mean(mpg), .groups = "drop") %>%
- pivot_wider(names_from = am, values_from = mpg)
-
-mpg_by_cyl_and_am
-#> # A tibble: 3 × 3
-#> cyl `0` `1`
-#>
-#> 1 4 22.9 28.1
-#> 2 6 19.1 20.6
-#> 3 8 15.0 15.4
-```
-
-Now to `adorn_` it. Since this is not the result of a `tabyl()` call, it
-doesn’t have the underlying Ns stored in the `core` attribute, so we’ll
-have to supply them:
-
-``` r
-mpg_by_cyl_and_am %>%
- adorn_rounding() %>%
- adorn_ns(
- ns = mtcars %>% # calculate the Ns on the fly by calling tabyl on the original data
- tabyl(cyl, am)
- ) %>%
- adorn_title("combined", row_name = "Cylinders", col_name = "Is Automatic")
-#> Cylinders/Is Automatic 0 1
-#> 1 4 22.9 (3) 28.1 (8)
-#> 2 6 19.1 (4) 20.6 (3)
-#> 3 8 15.1 (12) 15.4 (2)
-```
-
-If needed, Ns can be manipulated in their own data.frame before they are
-appended. Here a tabyl with values in the thousands has its Ns formatted
-to include the separating character `,` as typically seen in American
-numbers, e.g., `3,000`.
-
-First we create the tabyl to adorn:
-
-``` r
-set.seed(1)
-raw_data <- data.frame(sex = rep(c("m", "f"), 3000),
- age = round(runif(3000, 1, 102), 0))
-raw_data$agegroup = cut(raw_data$age, quantile(raw_data$age, c(0, 1/3, 2/3, 1)))
-
-comparison <- raw_data %>%
- tabyl(agegroup, sex, show_missing_levels = FALSE) %>%
- adorn_totals(c("row", "col")) %>%
- adorn_percentages("col") %>%
- adorn_pct_formatting(digits = 1)
-
-comparison
-#> agegroup f m Total
-#> (1,34] 33.9% 32.3% 33.1%
-#> (34,68] 33.0% 33.7% 33.4%
-#> (68,102] 32.7% 33.3% 33.0%
-#> 0.4% 0.6% 0.5%
-#> Total 100.0% 100.0% 100.0%
-```
-
-At this point, the Ns are unformatted:
-
-``` r
-comparison %>%
- adorn_ns()
-#> agegroup f m Total
-#> (1,34] 33.9% (1,018) 32.3% (970) 33.1% (1,988)
-#> (34,68] 33.0% (990) 33.7% (1,012) 33.4% (2,002)
-#> (68,102] 32.7% (980) 33.3% (1,000) 33.0% (1,980)
-#> 0.4% (12) 0.6% (18) 0.5% (30)
-#> Total 100.0% (3,000) 100.0% (3,000) 100.0% (6,000)
-```
-
-Now we format them to insert the thousands commas. A tabyl’s raw Ns are
-stored in its `"core"` attribute. Here we retrieve those with `attr()`,
-then apply the base R function `format()` to all numeric columns.
-Lastly, we append these Ns using `adorn_ns()`.
-
-``` r
-formatted_ns <- attr(comparison, "core") %>% # extract the tabyl's underlying Ns
- adorn_totals(c("row", "col")) %>% # to match the data.frame we're appending to
- dplyr::mutate(across(where(is.numeric), ~ format(.x, big.mark = ",")))
-
-comparison %>%
- adorn_ns(position = "rear", ns = formatted_ns)
-#> agegroup f m Total
-#> (1,34] 33.9% (1,018) 32.3% ( 970) 33.1% (1,988)
-#> (34,68] 33.0% ( 990) 33.7% (1,012) 33.4% (2,002)
-#> (68,102] 32.7% ( 980) 33.3% (1,000) 33.0% (1,980)
-#> 0.4% ( 12) 0.6% ( 18) 0.5% ( 30)
-#> Total 100.0% (3,000) 100.0% (3,000) 100.0% (6,000)
-```
-
-### Questions? Comments?
-
-File [an issue on GitHub](https://github.com/sfirke/janitor/issues) if
-you have suggestions related to `tabyl()` and its `adorn_` helpers or
-encounter problems while using them.