- n = c(7, 10, 3, 10, 1, 1),
- percent = c("22%", "31%", "9%", "31%", "3%", "3%"),
- stringsAsFactors = FALSE
- )
- )
-test_that("NAs are replaced with dashes when percentage signs are affixed", {
- # NaNs from adorn_percentages, the more common case (still uncommon)
- has_nans <- mtcars %>%
- tabyl(carb, cyl) %>%
- .[5:6, ] %>%
- adorn_percentages("col") %>%
- adorn_pct_formatting() %>%
- untabyl()
- row.names(has_nans) <- NULL
- expect_equal(
- has_nans,
- data.frame(
- carb = c(6, 8),
- `4` = c("-", "-"),
- `6` = c("100.0%", "0.0%"),
- `8` = c("0.0%", "100.0%"),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
- # NAs convert to -
- has_nas <- data.frame(a = c("big", "little"), x = c(0.1, 0.123), y = c(0.98, NA), stringsAsFactors = FALSE)
- expect_equal(
- adorn_pct_formatting(has_nas),
- data.frame(a = c("big", "little"), x = c("10.0%", "12.3%"), y = c("98.0%", "-"), stringsAsFactors = FALSE)
- )
-test_that("NAs are replaced with dashes - no percentage signs affixed", {
- # NaNs from adorn_percentages, the more common case (still uncommon)
- has_nans <- mtcars %>%
- tabyl(carb, cyl) %>%
- .[5:6, ] %>%
- adorn_percentages("col") %>%
- adorn_pct_formatting(affix_sign = FALSE) %>%
- untabyl()
- row.names(has_nans) <- NULL
- expect_equal(
- has_nans,
- data.frame(
- carb = c(6, 8),
- `4` = c("-", "-"),
- `6` = c("100.0", "0.0"),
- `8` = c("0.0", "100.0"),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
- # NAs convert to - symbol
- has_nas <- data.frame(a = c("big", "little"), x = c(0.1, 0.123), y = c(0.98, NA), stringsAsFactors = FALSE)
- expect_equal(
- adorn_pct_formatting(has_nas, affix_sign = FALSE),
- data.frame(a = c("big", "little"), x = c("10.0", "12.3"), y = c("98.0", "-"), stringsAsFactors = FALSE)
- )
-test_that("bad rounding argument caught", {
- expect_error(
- dat %>%
- adorn_percentages() %>%
- adorn_pct_formatting(rounding = "blargh"),
- "`rounding` must be one of \"half to even\" or \"half up\", not \"blargh\".",
- fixed = TRUE
- )
-test_that("automatically invokes purrr::map when called on a 3-way tabyl", {
- three <- tabyl(mtcars, cyl, am, gear)
- expect_equal(
- adorn_pct_formatting(three), # vanilla call
- purrr::map(three, adorn_pct_formatting)
- )
- # with arguments passing through
- expect_equal(
- adorn_pct_formatting(three, 2, "half up", affix_sign = FALSE),
- purrr::map(three, adorn_pct_formatting, 2, "half up", FALSE)
- )
-test_that("non-data.frame inputs are handled", {
- expect_error(adorn_pct_formatting(1:5), "adorn_pct_formatting() must be called on a data.frame or list of data.frames", fixed = TRUE)
-test_that("tidyselecting works", {
- target <- data.frame(
- color = c("green", "blue", "red"),
- first_wave = c(1:3),
- second_wave = c(4:6),
- third_wave = c(3, 3, 3),
- size = c("small", "medium", "large"),
- stringsAsFactors = FALSE
- ) %>%
- adorn_percentages()
- two_cols <- target %>%
- adorn_pct_formatting(, , , first_wave:second_wave)
- expect_equal(two_cols$first_wave, c("12.5%", "20.0%", "25.0%"))
- expect_equal(two_cols$third_wave, c(3 / 8, 3 / 10, 3 / 12))
- expect_message(
- target %>%
- adorn_pct_formatting(, , , third_wave:size),
- "At least one non-numeric column was specified and will not be modified."
- )
- # correct behavior occurs when text columns are skipped
- expect_message(
- text_skipped <- target %>%
- adorn_pct_formatting(., , , , c(first_wave, size)),
- "At least one non-numeric column was specified and will not be modified."
- )
- expect_equal(text_skipped$first_wave, c("12.5%", "20.0%", "25.0%"))
- expect_equal(
- text_skipped %>% dplyr::select(-first_wave),
- target %>% dplyr::select(-first_wave),
- ignore_attr = TRUE
- )
-test_that("decimal.mark works", {
- locale_decimal_sep <- getOption("OutDec") # not sure if it's necessary to save and restore this,
- # but seems safe for locale-independent testing processes
- options(OutDec = ",")
- expect_true(
- all(grepl(",", unlist(adorn_pct_formatting(source1)[2])))
- )
- options(OutDec = locale_decimal_sep)
-source1 <- mtcars %>%
- tabyl(cyl, am)
-test_that("bad input to denominator arg is caught", {
- expect_error(
- mtcars %>%
- adorn_percentages("blargh"),
- "`denominator` must be one of \"row\", \"col\", or \"all\"",
- fixed = TRUE
- )
-test_that("calculations are accurate", {
- expect_equal(
- untabyl(adorn_percentages(source1)), # default parameter is denominator = "row"
- data.frame(
- cyl = c(4, 6, 8),
- `0` = c(3 / 11, 4 / 7, 12 / 14),
- `1` = c(8 / 11, 3 / 7, 2 / 14),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
- expect_equal(
- untabyl(adorn_percentages(source1, denominator = "col")),
- data.frame(
- cyl = c(4, 6, 8),
- `0` = c(3 / 19, 4 / 19, 12 / 19),
- `1` = c(8 / 13, 3 / 13, 2 / 13),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
- expect_equal(
- untabyl(adorn_percentages(source1, denominator = "all")),
- data.frame(
- cyl = c(4, 6, 8),
- `0` = c(3 / 32, 4 / 32, 12 / 32),
- `1` = c(8 / 32, 3 / 32, 2 / 32),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
-source2 <- source1 %>%
- adorn_totals(c("row", "col"))
-test_that("calculations are correct when totals row/col doesn't match axis of computation", {
- expect_equal(
- untabyl(adorn_percentages(source2, denominator = "row")),
- data.frame(
- cyl = c(4, 6, 8, "Total"),
- `0` = c(3 / 11, 4 / 7, 12 / 14, 19 / 32),
- `1` = c(8 / 11, 3 / 7, 2 / 14, 13 / 32),
- Total = c(1, 1, 1, 1),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
-test_that("works with totals row/col when denominator = col or all, #357", {
- col_percs <- source1 %>%
- adorn_totals(where = c("col", "row")) %>%
- adorn_percentages(denominator = "col")
- expect_equal(col_percs$Total, c(11, 7, 14, 32) / 32)
- expect_equal(unname(unlist(col_percs[4, ])), c("Total", rep(1, 3)))
- # Same but for denominator = all
- all_percs <- source1 %>%
- adorn_totals(where = c("col", "row")) %>%
- adorn_percentages(denominator = "all")
- expect_equal(all_percs$Total, c(11, 7, 14, 32) / 32)
- expect_equal(unname(unlist(all_percs[4, ])), unname(c("Total", colSums(source1)[2:3] / 32, 32 / 32)))
- # Now with no totals row, same two tests as preceding
- col_percs_no_row <- source1 %>%
- adorn_totals(where = c("col")) %>%
- adorn_percentages(denominator = "col")
- expect_equal(col_percs_no_row$Total, c(11, 7, 14) / 32)
- # Same but for denominator = all
- all_percs_no_row <- source1 %>%
- adorn_totals(where = c("col")) %>%
- adorn_percentages(denominator = "all")
- expect_equal(all_percs_no_row$Total, c(11, 7, 14) / 32)
- # And try one where we exempt the totals col
- expect_message(
- col_percs_exempted <- source1 %>%
- adorn_totals(where = c("col", "row")) %>%
- adorn_percentages(denominator = "col", , -Total),
- regexp = "At least one non-numeric column was specified. All non-numeric columns will be removed from percentage calculations."
- )
- expect_equal(col_percs_exempted$Total, c(11, 7, 14, 32))
- expect_equal(unname(unlist(col_percs_exempted[4, ])), c("Total", 1, 1, 32))
- expect_message(
- all_percs_exempted <- source1 %>%
- adorn_totals(where = c("col", "row")) %>%
- adorn_percentages(denominator = "all", , -Total),
- regexp = "At least one non-numeric column was specified. All non-numeric columns will be removed from percentage calculations."
- )
- expect_equal(all_percs_exempted$Total, c(11, 7, 14, 32))
- expect_equal(unname(unlist(all_percs_exempted[4, ])), unname(c("Total", colSums(source1)[2:3] / 32, 32)))
-source2 <- source1
-source2[2, 2] <- NA
-test_that("NAs handled correctly with na.rm = TRUE", {
- expect_equal(
- untabyl(adorn_percentages(source2)), # row
- data.frame(
- cyl = c(4, 6, 8),
- `0` = c(3 / 11, NA, 12 / 14),
- `1` = c(8 / 11, 1, 2 / 14),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
- expect_equal(
- untabyl(adorn_percentages(source2, denominator = "col")),
- data.frame(
- cyl = c(4, 6, 8),
- `0` = c(3 / 15, NA, 12 / 15),
- `1` = c(8 / 13, 3 / 13, 2 / 13),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
-test_that("NAs handled correctly with na.rm = FALSE", {
- expect_equal(
- untabyl(adorn_percentages(source2, na.rm = FALSE)), # row
- data.frame(
- cyl = c(4, 6, 8),
- `0` = c(3 / 11, NA, 12 / 14),
- `1` = c(8 / 11, NA, 2 / 14),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
- expect_equal(
- untabyl(adorn_percentages(source2, denominator = "col", na.rm = FALSE)),
- data.frame(
- cyl = c(4, 6, 8),
- `0` = as.numeric(c(NA, NA, NA)),
- `1` = c(8 / 13, 3 / 13, 2 / 13),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
-test_that("data.frames with no numeric columns beyond the first cause failure", {
- expect_error(
- adorn_percentages(data.frame(a = 1:2, b = c("hi", "lo"))),
- "at least one one of columns 2:n must be of class numeric"
- )
-test_that("works with a single numeric column per #89", {
- dat <- data.frame(Operation = c("Login", "Posted", "Deleted"), `Total Count` = c(5, 25, 40), check.names = FALSE)
- expect_equal(
- dat %>% adorn_percentages("col") %>% untabyl(),
- data.frame(
- Operation = c("Login", "Posted", "Deleted"),
- `Total Count` = c(5 / 70, 25 / 70, 40 / 70),
- check.names = FALSE
- )
- )
-test_that("works with totals row", {
- dat <- data.frame(Operation = c("Login", "Posted", "Deleted"), `Total Count` = c(5, 25, 40), check.names = FALSE)
- expect_equal(
- dat %>% adorn_totals("row") %>% adorn_percentages("col") %>% untabyl(),
- data.frame(
- Operation = c("Login", "Posted", "Deleted", "Total"),
- `Total Count` = c(5 / 70, 25 / 70, 40 / 70, 1),
- check.names = FALSE, stringsAsFactors = FALSE
- )
- )
-test_that("automatically invokes purrr::map when called on a 3-way tabyl", {
- three <- tabyl(mtcars, cyl, am, gear)
- expect_equal(
- adorn_percentages(three), # vanilla call
- purrr::map(three, adorn_percentages)
- )
- # with arguments passing through
- expect_equal(
- adorn_percentages(three, "col", na.rm = FALSE),
- purrr::map(three, adorn_percentages, "col", FALSE)
- )
-test_that("non-data.frame inputs are handled", {
- expect_error(adorn_percentages(1:5), "adorn_percentages() must be called on a data.frame or list of data.frames", fixed = TRUE)
-test_that("tidyselecting works", {
- target <- data.frame(
- color = c("green", "blue", "red"),
- first_wave = c(1:3),
- second_wave = c(4:6),
- third_wave = c(3, 3, 3),
- size = c("small", "medium", "large"),
- stringsAsFactors = FALSE
- )
- two_cols <- target %>%
- adorn_percentages(, , , first_wave:second_wave)
- expect_equal(two_cols$first_wave, c(1 / 5, 2 / 7, 3 / 9))
- expect_equal(two_cols$third_wave, rep(3, 3))
- expect_message(
- target %>%
- adorn_percentages(., "col", , c(first_wave, size)),
- "At least one non-numeric column was specified. All non-numeric columns will be removed from percentage calculations."
- )
- expect_message(
- text_skipped <- target %>%
- adorn_percentages(., "col", , c(first_wave, size)),
- regexp = "At least one non-numeric column was specified. All non-numeric columns will be removed from percentage calculations."
- )
- expect_equal(text_skipped$first_wave, target$first_wave / sum(target$first_wave))
- expect_equal(
- text_skipped %>% dplyr::select(-first_wave),
- target %>% dplyr::select(-first_wave),
- ignore_attr = TRUE
- )
- # Check combination of totals and tidyselecting does not modify totals col
- totaled <- target %>%
- adorn_totals("col", , , , second_wave:third_wave) %>%
- adorn_percentages(, , , second_wave:third_wave)
- expect_equal(totaled$Total, 7:9)
-x <- data.frame(
- a = c(rep("x", 55), rep("y", 45)),
- b = c(rep("x", 50), rep("y", 50)),
- stringsAsFactors = FALSE
-# Crosstab with decimal values ending in .5
-y <- x %>%
- tabyl(a, b) %>%
- adorn_percentages("all")
-test_that("rounding parameter works", {
- expect_equal(
- y %>%
- adorn_rounding(digits = 1, rounding = "half up") %>%
- untabyl(),
- data.frame(
- a = c("x", "y"),
- x = c(0.5, 0.0),
- y = c(0.1, 0.5),
- stringsAsFactors = FALSE
- )
- )
- # Test failing on CRAN and only there
- skip_on_cran()
- expect_equal(
- y %>%
- adorn_rounding(digits = 1) %>% # default rounding: "half to even"
- untabyl(),
- data.frame(
- a = c("x", "y"),
- x = c(0.5, 0.0),
- y = c(0.0, 0.4),
- stringsAsFactors = FALSE
- )
- )
-test_that("digit control succeeds", {
- expect_equal(
- y %>%
- adorn_rounding(digits = 0, rounding = "half up") %>%
- untabyl(),
- data.frame(
- a = c("x", "y"),
- x = c(1, 0),
- y = c(0, 0),
- stringsAsFactors = FALSE
- )
- )
- expect_equal(
- y %>%
- adorn_rounding(digits = 2, rounding = "half up"), # shouldn't do anything given the input only having 2 decimal places
- y
- )
-test_that("bad rounding argument caught", {
- expect_error(
- y %>%
- adorn_rounding(rounding = "blargh"),
- "'rounding' must be one of 'half to even' or 'half up'",
- fixed = TRUE
- )
-test_that("works when called on a 3-way tabyl", {
- triple <- mtcars %>%
- tabyl(am, cyl, vs) %>%
- adorn_percentages("row")
- triple_rounded_manual <- triple
- triple_rounded_manual[[1]] <- adorn_rounding(triple[[1]])
- triple_rounded_manual[[2]] <- adorn_rounding(triple[[2]])
- expect_equal(
- triple %>%
- adorn_rounding(),
- triple_rounded_manual
- )
-test_that("tidyselecting works", {
- target <- data.frame(
- color = c("green", "blue", "red"),
- first_wave = c(1:3),
- second_wave = c(4:6),
- third_wave = c(3, 3, 3),
- size = c("small", "medium", "large"),
- stringsAsFactors = FALSE
- ) %>%
- adorn_percentages()
- two_cols <- target %>%
- adorn_rounding(, "half up", first_wave:second_wave)
- expect_equal(two_cols$first_wave, c(.1, .2, .3))
- expect_equal(two_cols$third_wave, c(3 / 8, 3 / 10, 3 / 12))
- expect_message(
- target %>%
- adorn_rounding(, , third_wave:size),
- "At least one non-numeric column was specified and will not be modified."
- )
- expect_message(
- text_skipped <- target %>%
- adorn_rounding(, , c(first_wave, size)),
- "At least one non-numeric column was specified and will not be modified."
- )
- expect_equal(text_skipped$first_wave, c(.1, .2, .2))
- expect_equal(
- text_skipped %>% dplyr::select(-first_wave),
- target %>% dplyr::select(-first_wave),
- ignore_attr = TRUE
- )
-test_that("non-data.frame inputs are handled", {
- expect_error(adorn_rounding(1:5), "adorn_rounding() must be called on a data.frame or list of data.frames", fixed = TRUE)
-source1 <- mtcars %>%
- tabyl(gear, cyl)
-test_that("placement is correct", {
- # Top
- expect_equal(
- source1 %>%
- adorn_title() %>%
- names(),
- c("", "cyl", rep("", 2))
- )
- expect_equal(
- source1 %>%
- adorn_title() %>%
- .[1, ] %>%
- unlist() %>%
- unname(),
- c("gear", "4", "6", "8")
- )
- # Combined
- expect_equal(
- source1 %>%
- adorn_title("combined") %>%
- names(),
- c("gear/cyl", "4", "6", "8")
- )
-test_that("name overrides work", {
- expect_equal(
- source1 %>%
- adorn_title(row_name = "R", col_name = "C") %>%
- names(),
- c("", "C", rep("", 2))
- )
-test_that("non-tabyls are treated correctly", {
- non_tab <- mtcars %>%
- dplyr::count(gear, cyl) %>%
- tidyr::pivot_wider(names_from = gear, values_from = n)
- expect_error(adorn_title(non_tab), "When input is not a data.frame of class tabyl, a value must be specified for the col_name argument")
- expect_equal(
- non_tab %>% adorn_title(col_name = "col") %>% names(),
- c("", "col", rep("", 2))
- )
- expect_equal(
- non_tab %>% adorn_title(placement = "combined", col_name = "col") %>% names(),
- c("cyl/col", 3, 4, 5)
- )
- expect_equal(
- non_tab %>% adorn_title(placement = "combined", row_name = "row!", col_name = "col") %>% names(),
- c("row!/col", 3, 4, 5)
- )
-test_that("bad inputs are caught", {
- expect_error(adorn_title(1:2),
- "\"dat\" must be a data.frame",
- fixed = TRUE
- )
- expect_error(
- adorn_title(source1,
- placement = "blargh"
- ),
- "`placement` must be one of \"top\" or \"combined\"",
- fixed = TRUE
- )
- expect_error(
- adorn_title(source1,
- row_name = 1:4
- ),
- "row_name must be a string"
- )
- expect_error(
- adorn_title(source1,
- col_name = mtcars
- ),
- "col_name must be a string"
- )
- # Doesn't make sense with a one-way tabyl
- expect_warning(
- mtcars %>% tabyl(cyl) %>% adorn_title(),
- "adorn_title is meant for two-way tabyls, calling it on a one-way tabyl may not yield a meaningful result"
- )
-test_that("works with non-count inputs", {
- source2_base <- data.frame(sector = c("North", "South"), units = 1:2, group = c("a", "b"))
- source2_tibble <- dplyr::as_tibble(source2_base)
- expect_equal(
- adorn_title(source2_base, col_name = "Characteristics") %>% names(),
- c("", "Characteristics", "")
- )
- expect_equal(
- adorn_title(source2_base, col_name = "Characteristics"),
- adorn_title(source2_tibble, col_name = "Characteristics")
- )
-test_that("for printing purposes: tabyl class stays tabyl, data.frame stays data.frame, tibble is downgraded to data.frame", {
- # right output classes with tabyl inputs
- expect_equal(class(mtcars %>% tabyl(cyl, am) %>% adorn_title()), c("tabyl", "data.frame"))
- expect_equal(class(mtcars %>% tabyl(gear, carb) %>% adorn_title(., "combined")), c("tabyl", "data.frame"))
- # Create tibble input:
- mpg_by_cyl_and_am <-
- mtcars %>%
- dplyr::group_by(cyl, am) %>%
- dplyr::summarise(mean_mpg = mean(mpg)) %>%
- tidyr::pivot_wider(names_from = am, values_from = mean_mpg)
- # handles tibble input
- expect_s3_class(
- mpg_by_cyl_and_am %>% adorn_title("top", "Cylinders", "Automatic?"),
- "data.frame"
- )
- # Convert columns 2:n to strings
- expect_s3_class(
- mpg_by_cyl_and_am %>% adorn_pct_formatting() %>% # nonsense command here, just want to convert cols 2:n into character
- adorn_title("top", "Cylinders", "Automatic?"),
- "data.frame"
- )
- # handles data.frame non-tabyl input
- expect_s3_class(
- mtcars %>% adorn_title("top", col_name = "hey look ma I'm a title"),
- "data.frame"
- )
-test_that("works with factors in input", {
- facts <- data.frame(a = "high", large = "1", stringsAsFactors = TRUE)
- # first with "top" then "combined"
- expect_equal(
- facts %>% adorn_title(col_name = "col"),
- data.frame(a = c("a", "high"), col = c("large", "1"), stringsAsFactors = FALSE) %>%
- setNames(., c("", "col"))
- )
- # with combined the original column types are preserved
- expect_equal(
- facts %>% adorn_title("combined", col_name = "col"),
- data.frame(`a/col` = "high", large = "1", stringsAsFactors = TRUE, check.names = FALSE)
- )
-test_that("automatically invokes purrr::map when called on a 3-way tabyl", {
- three <- tabyl(mtcars, cyl, am, gear) %>%
- adorn_percentages() %>%
- adorn_pct_formatting()
- expect_equal(
- adorn_title(three), # vanilla call
- purrr::map(three, adorn_title)
- )
- # with arguments passing through, incl. custom row and col names
- expect_equal(
- adorn_title(three, "combined", "cyl", "am"),
- purrr::map(three, adorn_title, "combined", "cyl", "am")
- )
-dat <- data.frame(
- a = factor(c(rep(c("big", "small", "big"), 3)), levels = c("small", "big")),
- b = c(1:3, 1:3, 1, 1, 1)
-ct <- dat %>%
- tabyl(a, b)
-mixed <- data.frame(
- a = 1:3,
- b = c("x", "y", "z"),
- c = 5:7,
- d = c("big", "med", "small"),
- stringsAsFactors = FALSE
-test_that("totals row is correct", {
- expect_equal(
- untabyl(adorn_totals(ct, "row")),
- data.frame(
- a = factor(c("small", "big", "Total"), levels = c("small", "big", "Total")),
- `1` = c(1, 4, 5),
- `2` = c(2, 0, 2),
- `3` = c(0, 2, 2),
- check.names = FALSE
- )
- )
-test_that("totals col is correct", {
- expect_equal(
- untabyl(adorn_totals(ct, "col")),
- data.frame(
- a = factor(c("small", "big"), levels = c("small", "big")),
- `1` = c(1, 4),
- `2` = c(2, 0),
- `3` = c(0, 2),
- Total = c(3, 6),
- check.names = FALSE
- )
- )
-test_that("totals row and col produce correct results when called together", {
- expect_equal(
- ct %>%
- adorn_totals(c("row", "col")) %>%
- untabyl(),
- data.frame(
- a = factor(c("small", "big", "Total"), levels = c("small", "big", "Total")),
- `1` = c(1, 4, 5),
- `2` = c(2, 0, 2),
- `3` = c(0, 2, 2),
- Total = c(3, 6, 9),
- check.names = FALSE
- )
- )
-test_that("totals where='both' produce equivalent results to c('row','col')", {
- expect_equal(
- ct %>%
- adorn_totals("both") %>%
- untabyl(),
- ct %>%
- adorn_totals(c("row", "col")) %>%
- untabyl()
- )
-test_that("order doesn't matter when row and col are called together", {
- expect_equal(
- ct %>%
- adorn_totals(c("row", "col")) %>%
- untabyl(),
- ct %>%
- adorn_totals(c("col", "row")) %>%
- untabyl()
- )
-test_that("both functions work with a single column", {
- single_col <- tibble::tibble(
- a = c(as.Date("2016-01-01"), as.Date("2016-02-03")),
- b = c(1, 2)
- )
- expect_error(single_col %>% adorn_totals("row"), NA) # this method of testing passage is from http://stackoverflow.com/a/30068233
- expect_error(single_col %>% adorn_totals("col"), NA)
- expect_error(single_col %>% adorn_totals(c("col", "row")), NA)
-dat <- data.frame(
- a = c("hi", "lo"),
- b = c(1, 2),
- c = c(5, 10),
- d = c("big", "small"),
- e = c(20, NA),
- stringsAsFactors = FALSE
-test_that("numeric first column is ignored", {
- expect_equal(
- mtcars %>%
- tabyl(cyl, gear) %>%
- adorn_totals("col") %>%
- untabyl(),
- data.frame(
- cyl = c(4, 6, 8),
- `3` = c(1, 2, 12),
- `4` = c(8, 4, 0),
- `5` = c(2, 1, 2),
- Total = c(11, 7, 14),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- )
-# create input tables for subsequent testing
-ct_2 <-
- mtcars %>%
- dplyr::group_by(cyl, gear) %>%
- dplyr::tally() %>%
- tidyr::pivot_wider(names_from = gear, values_from = n)
-df1 <- data.frame(x = c(1, 2), y = c(NA, 4))
-test_that("grouped_df gets ungrouped and succeeds", {
- ct_2 <-
- mtcars %>%
- dplyr::group_by(cyl, gear) %>%
- dplyr::tally() %>%
- tidyr::pivot_wider(names_from = gear, values_from = n)
- expect_equal(
- ct_2 %>% adorn_totals(),
- ct_2 %>% dplyr::ungroup() %>% adorn_totals()
- )
-test_that("na.rm value works correctly", {
- expect_equal(
- df1 %>% adorn_totals(c("row", "col"), na.rm = FALSE) %>% untabyl(),
- data.frame(
- x = c("1", "2", "Total"),
- y = c(NA, 4, NA),
- Total = c(NA, 4, NA),
- stringsAsFactors = FALSE
- )
- )
-test_that("add_totals respects if input was data.frame", {
- expect_equal(
- class(df1),
- class(df1 %>% adorn_totals() %>% untabyl())
- )
-test_that("add_totals respects if input was tibble", {
- expect_equal(
- class(df1 %>% tibble::as_tibble()),
- class(df1 %>% tibble::as_tibble() %>% adorn_totals() %>% untabyl())
- )
-test_that("error thrown if no columns past first are numeric", {
- df2 <- data.frame(
- x = c("big", "small"),
- y = c("hi", "lo")
- )
- expect_error(
- adorn_totals(df2, "col"),
- "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions."
- )
- expect_error(
- mixed %>%
- adorn_totals("row", "-", TRUE, "Totals", d),
- "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions."
- )
- # Add a test where only the first column is numeric
- df3 <- data.frame(
- x = 1:2,
- y = c("hi", "lo")
- )
- expect_error(
- adorn_totals(df3),
- "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions."
- )
-test_that("bad input to where arg is caught", {
- expect_error(
- mtcars %>%
- adorn_totals("blargh"),
- paste0('"where" must be one of "row", "col", or c("row", "col")'),
- fixed = TRUE
- )
-test_that("works with non-numeric columns mixed in; fill character specification", {
- expect_equal(
- mixed %>% adorn_totals(where = c("row", "col"), fill = "*") %>% untabyl(),
- data.frame(
- a = c("1", "2", "3", "Total"),
- b = c("x", "y", "z", "*"),
- c = c(5, 6, 7, 18),
- d = c("big", "med", "small", "*"),
- Total = c(5, 6, 7, 18),
- stringsAsFactors = FALSE
- )
- )
-test_that("fill works with multiple factor and date columns", {
- has_facs <- data.frame(
- a = c("hi", "low"),
- b = c("big", "small"),
- c = c(as.Date("2000-01-01"), as.Date("2000-01-02")),
- d = 1:2
- )
- expect_equal(
- adorn_totals(has_facs, "row") %>% untabyl(),
- data.frame(
- a = c("hi", "low", "Total"),
- b = c("big", "small", "-"),
- c = c("2000-01-01", "2000-01-02", "-"),
- d = 1:3,
- stringsAsFactors = FALSE
- )
- )
-test_that("totals attributes are assigned correctly", {
- post <- adorn_totals(ct, c("row", "col"))
- expect_equal(attr(post, "totals"), c("row", "col"))
- expect_equal(class(post), c("tabyl", "data.frame"))
- expect_equal(attr(post, "tabyl_type"), "two_way")
- expect_equal(attr(post, "core"), untabyl(ct))
- post_col <- adorn_totals(ct, "col")
- expect_equal(attr(post_col, "totals"), "col")
- expect_equal(class(post_col), c("tabyl", "data.frame"))
- expect_equal(attr(post_col, "tabyl_type"), "two_way")
- expect_equal(attr(post_col, "core"), untabyl(ct))
- post_sequential_both <- adorn_totals(ct, "col") %>%
- adorn_totals("row")
- expect_equal(post_sequential_both, post, ignore_attr = TRUE)
- expect_equal(
- sort(attr(post, "totals")),
- sort(attr(post_sequential_both, "totals"))
- )
-test_that("trying to re-adorn a dimension fails", {
- expect_error(
- ct %>% adorn_totals("col") %>% adorn_totals("col"),
- "trying to re-add a totals dimension that is already been added"
- )
- expect_error(
- ct %>% adorn_totals() %>% adorn_totals(),
- "trying to re-add a totals dimension that is already been added"
- )
-test_that("automatically invokes purrr::map when called on a 3-way tabyl", {
- three <- tabyl(mtcars, cyl, am, gear)
- expect_equal(
- adorn_totals(three), # vanilla call
- purrr::map(three, adorn_totals)
- )
- # with arguments passing through
- expect_equal(
- adorn_totals(three, c("row", "col"), fill = "---", na.rm = FALSE, name = "dummy_name"),
- purrr::map(three, adorn_totals, c("row", "col"), fill = "---", FALSE, name = "dummy_name")
- )
-test_that("non-data.frame inputs are handled", {
- expect_error(adorn_totals(1:5), "adorn_totals() must be called on a data.frame or list of data.frames", fixed = TRUE)
-test_that("row total name is changed", {
- expect_equal(
- as.character(adorn_totals(ct, name = "NewTitle")[nrow(ct) + 1, 1]),
- "NewTitle"
- )
-test_that("column total name is changed", {
- expect_equal(
- colnames(adorn_totals(ct, where = "col", name = "NewTitle"))[(ncol(ct) + 1)],
- "NewTitle"
- )
-test_that("tidyselecting works", {
- cyl_gear <- mtcars %>%
- adorn_totals(c("row", "col"), "-", TRUE, "cylgear", c(cyl, gear))
- expect_equal(cyl_gear$cylgear, c(mtcars$cyl + mtcars$gear, (sum(mtcars$cyl) + sum(mtcars$gear))))
- expect_equal(
- unname(unlist(cyl_gear[33, ])),
- c("cylgear", "198", rep("-", 7), "118", "-", "316")
- )
- # Can override the first column not being included
- # adorn_totals() still fails if ONLY the first column is numeric, that's fine - it's a nonsensical operation
- simple <- data.frame(
- x = 1:2,
- y = 3:4,
- z = c("hi", "lo")
- )
- expect_message(
- simple %>%
- adorn_totals(c("row", "col"), "-", TRUE, "Total", x),
- "Because the first column was specified to be totaled, it does not contain the label 'Total' (or user-specified name) in the totals row",
- fixed = TRUE
- )
- expect_message(
- simple_total <- simple %>%
- adorn_totals(c("row", "col"), "-", TRUE, "Total", x),
- regexp = "Because the first column was specified to be totaled, it does not contain the label 'Total' (or user-specified name) in the totals row",
- fixed = TRUE
- )
- expect_equal(unname(unlist(simple_total[3, ])), c("3", "-", "-", "3"))
- expect_equal(simple_total$Total, 1:3)
- # test that leaving out a numeric column of a tibble succeeds, #388
- expect_equal(
- simple %>%
- adorn_totals(, , , , y) %>%
- as.data.frame(),
- simple %>%
- tibble::tibble() %>%
- adorn_totals() %>%
- as.data.frame()
- )
-test_that("supplying NA to fill preserves column types", {
- test_df <- data.frame(
- a = c("hi", "low", "med"),
- b = factor(c("big", "small", "regular")),
- c = c(as.Date("2000-01-01"), as.Date("2000-01-02"), as.Date("2000-01-03")),
- d = c(as.POSIXct("2000-01-01", tz = "ROK"), as.POSIXct("2000-01-02"), as.POSIXct("2000-01-03")),
- e = 1:3,
- f = 4:6,
- g = c(TRUE, FALSE, TRUE),
- h = c(7.2, 8.2, 9.2),
- stringsAsFactors = FALSE
- )
- out <- adorn_totals(test_df, fill = NA)
- # expect types to be preserved
- expect_type(out[["a"]], "character")
- expect_s3_class(out[["b"]], "factor")
- expect_s3_class(out[["c"]], "Date")
- expect_s3_class(out[["d"]], "POSIXct")
- expect_type(out[["g"]], "logical")
- # expect factor levels to be preserved
- expect_equal(levels(out[["b"]]), levels(test_df[["b"]]))
- # expect NAs in total rows for non-numerics
- expect_true(is.na(out[4, "b"]))
- expect_true(is.na(out[4, "c"]))
- expect_true(is.na(out[4, "d"]))
- expect_true(is.na(out[4, "g"]))
- # test values of totals
- expect_equal(out[4, "a"], "Total")
- expect_equal(out[4, "e"], 6)
- expect_equal(out[4, "f"], 15)
- expect_equal(out[4, "h"], 24.6)
- # expect original df intact
- expect_equal(test_df, out[1:3, ], ignore_attr = TRUE)
-test_that("supplying NA as fill still works with non-character first col and numeric non-totaled cols", {
- test_df <- data.frame(
- a = factor(c("hi", "low", "med"), levels = c("low", "med", "hi")),
- b = factor(c("big", "small", "regular")),
- c = c(as.Date("2000-01-01"), as.Date("2000-01-02"), as.Date("2000-01-03")),
- d = 1:3,
- e = 4:6,
- f = c(TRUE, FALSE, TRUE),
- g = c(7.2, 8.2, 9.2),
- stringsAsFactors = FALSE
- )
- out <- adorn_totals(test_df,
- where = "row",
- fill = NA,
- na.rm = TRUE,
- name = "Total",
- d, e
- )
- expect_equal(out[["a"]], factor(c("hi", "low", "med", "Total"), levels = c("low", "med", "hi", "Total")))
- expect_equal(out[["g"]], c(7.2, 8.2, 9.2, NA_real_))
- expect_equal(out[4, "d"], 6)
- expect_equal(out[4, "e"], 15)
- expect_equal(test_df[1:3, 2:7], out[1:3, 2:7], ignore_attr = TRUE)
-test_that("one_way tabyl inputs retain that class", {
- expect_equal(
- attr(mtcars %>% tabyl(am) %>% adorn_totals("both"), "tabyl_type"),
- "one_way"
- )
-# Tests from #413, different values for row and col names
-test_that("long vectors are trimmed", {
- expect_equal(
- mixed %>%
- adorn_totals(
- where = "row",
- name = c("total", "something_else"),
- fill = "-"
- ) %>%
- untabyl(),
- data.frame(
- a = c(as.character(1:3), "total"),
- b = c("x", "y", "z", "-"),
- c = c(5:7, 18),
- d = c("big", "med", "small", "-"),
- stringsAsFactors = FALSE
- )
- )
-test_that("row and column names are taken correctly from a vector", {
- expect_equal(
- mixed %>%
- adorn_totals(
- where = "both",
- name = c("row_name", "col_name"),
- fill = "-"
- ) %>%
- untabyl(),
- data.frame(
- a = c(as.character(1:3), "row_name"),
- b = c("x", "y", "z", "-"),
- c = c(5, 6, 7, 18),
- d = c("big", "med", "small", "-"),
- col_name = c(5, 6, 7, 18),
- stringsAsFactors = FALSE
- )
- )
-test_that("row and column names are taken correctly from a single name", {
- expect_equal(
- mixed %>%
- adorn_totals(
- where = "both",
- name = "totals",
- fill = "-"
- ) %>%
- untabyl(),
- data.frame(
- a = c(as.character(1:3), "totals"),
- b = c("x", "y", "z", "-"),
- c = c(5, 6, 7, 18),
- d = c("big", "med", "small", "-"),
- totals = c(5, 6, 7, 18),
- stringsAsFactors = FALSE
- )
- )
-dat <- data.frame(
- a = c(rep(c("big", "small", "big"), 3)),
- b = c(1:3, 1:3, 1, 1, 1),
- stringsAsFactors = TRUE
-ct <- dat %>%
- tabyl(a, b)
-mixed <- data.frame(
- a = 1:3,
- b = c("x", "y", "z"),
- c = 5:7,
- d = c("big", "med", "small"),
- stringsAsFactors = FALSE
-test_that("long vectors are trimmed", {
- expect_equal(
- mixed %>%
- adorn_totals(
- where = "row",
- name = c("total", "row_total"),
- fill = "-"
- ) %>%
- untabyl(),
- data.frame(
- a = c(as.character(1:3), "total"),
- b = c("x", "y", "z", "-"),
- c = c(5:7, 18),
- d = c("big", "med", "small", "-"),
- stringsAsFactors = FALSE
- )
- )
-test_that("row and column names are taken correctly from a vector", {
- expect_equal(
- mixed %>%
- adorn_totals(
- where = "both",
- name = c("column_totals", "row_totals"),
- fill = "-"
- ) %>%
- untabyl(),
- data.frame(
- a = c(as.character(1:3), "column_totals"),
- b = c("x", "y", "z", "-"),
- c = c(5, 6, 7, 18),
- d = c("big", "med", "small", "-"),
- row_totals = c(5, 6, 7, 18),
- stringsAsFactors = FALSE
- )
- )
-test_that("row and column names are taken correctly from a single name", {
- expect_equal(
- mixed %>%
- adorn_totals(
- where = "both",
- name = "totals",
- fill = "-"
- ) %>%
- untabyl(),
- data.frame(
- a = c(as.character(1:3), "totals"),
- b = c("x", "y", "z", "-"),
- c = c(5, 6, 7, 18),
- d = c("big", "med", "small", "-"),
- totals = c(5, 6, 7, 18),
- stringsAsFactors = FALSE
- )
- )
-test_that("order is maintained when first column is a factor, #494", {
- o <- data.frame(
- a = 1:5,
- fac = factor(c("orange", "blue", "orange", "orange", "blue")),
- ord = ordered(
- c("huge", "medium", "small", "medium", "medium"),
- levels = c("small", "medium", "huge")
- )
- )
- o_tabyl_totaled <- o %>%
- tabyl(ord, a) %>%
- adorn_totals("both")
- expect_equal(
- attr(o_tabyl_totaled$ord, "levels"),
- c("small", "medium", "huge", "Total")
- )
- expect_equal(
- class(o_tabyl_totaled$ord),
- c("ordered", "factor")
- )
- f_tabyl_totaled <- o %>%
- tabyl(fac, a) %>%
- adorn_totals("both")
- expect_equal(
- attr(f_tabyl_totaled$fac, "levels"),
- c("blue", "orange", "Total")
- )
- expect_equal(
- class(f_tabyl_totaled$fac),
- "factor"
- )
-test_that("if factor level already present, adorn_totals() still works, #529", {
- factor_present <- mtcars %>%
- tabyl(am, cyl)
- factor_present$am <- factor(factor_present$am, levels = c("0", "1", "Total"))
- expect_equal(
- levels(adorn_totals(factor_present, "row")$am),
- c("0", "1", "Total")
- )
-# Tests the get_level_groups helper function called by top_levels()
-shorts <- factor(c("a", "b", "c", "d", "e", "f"), levels = rev(letters[1:6]))
-longs <- factor(c("aaaaaaaaaaaaaaaa", "bbbbbbbbbbbbbbbbb", "cccccccccccccccccccc", "dddddddddddddddd", NA, "hhhhhhhhhhhhhhhh", "bbbbbbbbbbbbbbbbb"), levels = c("dddddddddddddddd", "aaaaaaaaaaaaaaaa", "cccccccccccccccccccc", "bbbbbbbbbbbbbbbbb", "hhhhhhhhhhhhhhhh"))
-short1 <- get_level_groups(shorts, 1, max(as.numeric(shorts), na.rm = TRUE))
-short2 <- get_level_groups(shorts, 2, max(as.numeric(shorts), na.rm = TRUE))
-short3 <- get_level_groups(shorts, 3, max(as.numeric(shorts), na.rm = TRUE))
-test_that("names are grouped properly and groups are ordered correctly", {
- expect_equal(short1, list(top = "f", mid = "e, d, c, b", bot = "a"))
- expect_equal(short2, list(top = "f, e", mid = c("d, c"), bot = "b, a"))
- expect_equal(short3, list(top = "f, e, d", mid = NA, bot = "c, b, a"))
-long1 <- get_level_groups(longs, 1, max(as.numeric(longs), na.rm = TRUE))
-long2 <- get_level_groups(longs, 2, max(as.numeric(longs), na.rm = TRUE))
-test_that("truncation works correctly", {
- expect_equal(long1, list(top = "dddddddddddddddd", mid = "<<< Middle Group (3 categories) >>>", bot = "hhhhhhhhhhhhhhhh"))
- expect_equal(long2, list(top = "dddddddddddddddd, aaaaaaaaa...", mid = "cccccccccccccccccccc", bot = "bbbbbbbbbbbbbbbbb, hhhhhhhh..."))
- expect_equal(nchar(long2$top), 30)
- expect_equal(nchar(long2$bot), 30)
-# Tests for two-way statistical tests
-# Duplicate mtcars rows to avoid chis.test warnings
-mtcars3 <- rbind(mtcars, mtcars, mtcars)
-tab <- table(mtcars3$am, mtcars3$cyl)
-ttab <- tabyl(mtcars3, am, cyl)
-ow_tab <- tabyl(mtcars3, am)
-test_that("one-way tabyl is rejected by chisq.test and fisher.test", {
- expect_error(chisq.test(ow_tab))
- expect_error(fisher.test(ow_tab))
-test_that("janitor::chisq.test on a table is correct", {
- res <- stats::chisq.test(tab)
- jres <- janitor::chisq.test(tab)
- expect_equal(jres, res)
-test_that("janitor::chisq.test on a matrix is correct", {
- mat <- matrix(c(151, 434, 345, 221, 145, 167), ncol = 3)
- res <- stats::chisq.test(mat)
- jres <- janitor::chisq.test(mat)
- expect_equal(jres, res)
-test_that("janitor::chisq.test on two factors is correct", {
- res <- stats::chisq.test(mtcars3$am, mtcars3$cyl)
- jres <- janitor::chisq.test(mtcars3$am, mtcars3$cyl)
- expect_equal(jres, res)
-test_that("janitor::chisq.test with a numeric vector and p is correct", {
- v1 <- round(runif(10, 200, 1000))
- v2 <- round(runif(10, 200, 1000))
- res <- stats::chisq.test(v1, p = v2 / sum(v2))
- jres <- janitor::chisq.test(v1, p = v2 / sum(v2))
- expect_equal(jres, res)
-test_that("janitor::fisher.test on a table is correct", {
- res <- stats::fisher.test(tab)
- jres <- janitor::fisher.test(tab)
- expect_equal(jres, res)
-test_that("janitor::fisher.test on a matrix is correct", {
- mat <- matrix(c(151, 434, 345, 221, 145, 167), ncol = 3)
- res <- stats::fisher.test(mat)
- jres <- janitor::fisher.test(mat)
- expect_equal(jres, res)
-test_that("janitor::fisher.test on two vectors is correct", {
- res <- stats::fisher.test(mtcars3$am, mtcars3$cyl)
- jres <- janitor::fisher.test(mtcars3$am, mtcars3$cyl)
- expect_equal(jres, res)
-test_that("janitor::chisq.test on a two-way tabyl is identical to stats::chisq.test", {
- tab <- tabyl(mtcars3, am, cyl)
- tres <- chisq.test(tab, tabyl_results = FALSE)
- tab <- table(mtcars3$am, mtcars3$cyl)
- res <- chisq.test(tab)
- expect_equal(tres, res)
-test_that("janitor::fisher.test on a two-way tabyl is identical to stats::fisher.test", {
- tab <- tabyl(mtcars3, am, cyl)
- tres <- fisher.test(tab)
- tab <- table(mtcars3$am, mtcars3$cyl)
- res <- fisher.test(tab)
- expect_equal(tres, res)
-test_that("janitor::chisq.test returns tabyl tables", {
- tres <- chisq.test(ttab, tabyl_results = TRUE)
- expect_s3_class(tres$observed, "tabyl")
- expect_s3_class(tres$expected, "tabyl")
- expect_s3_class(tres$residuals, "tabyl")
- expect_s3_class(tres$stdres, "tabyl")
-test_that("returned tabyls have correct names and attributes", {
- tres <- chisq.test(ttab, tabyl_results = TRUE)
- expect_named(tres$observed, c("am", "4", "6", "8"))
- expect_named(tres$expected, c("am", "4", "6", "8"))
- expect_named(tres$residuals, c("am", "4", "6", "8"))
- expect_named(tres$stdres, c("am", "4", "6", "8"))
- expect_equal(tres$observed[[1]], c("0", "1"))
- expect_equal(tres$expected[[1]], c("0", "1"))
- expect_equal(tres$residuals[[1]], c("0", "1"))
- expect_equal(tres$stdres[[1]], c("0", "1"))
- expect_equal(attr(tres$observed, "var_names"), list(row = "am", col = "cyl"))
- expect_equal(attr(tres$expected, "var_names"), list(row = "am", col = "cyl"))
- expect_equal(attr(tres$residuals, "var_names"), list(row = "am", col = "cyl"))
- expect_equal(attr(tres$stdres, "var_names"), list(row = "am", col = "cyl"))
-test_that("totals are excluded from the statistical tests, #385", {
- # Chi-Square
- cx <- chisq.test(ttab)
- cx_totals <- suppressWarnings(chisq.test(adorn_totals(ttab, "both")))
- cx_totals$data.name <- "ttab" # otherwise the test shows a mismatch, as the inputs had different names
- expect_equal(
- cx,
- cx_totals
- )
- expect_warning(
- chisq.test(ttab %>% adorn_totals()),
- "detected a totals row"
- )
- # Fisher
- fisher <- fisher.test(ttab)
- fisher_totals <- suppressWarnings(fisher.test(adorn_totals(ttab, "both")))
- fisher_totals$data.name <- "ttab" # otherwise the test shows a mismatch, as the inputs had different names
- expect_equal(
- fisher,
- fisher_totals
- )
- expect_warning(
- fisher.test(ttab %>% adorn_totals()),
- "detected a totals row"
- )
-# Tests tabyl class functions
-a <- mtcars %>%
- tabyl(cyl, carb)
-b <- mtcars %>%
- dplyr::count(cyl, carb) %>%
- tidyr::pivot_wider(
- names_from = carb,
- values_from = n,
- values_fill = 0,
- names_sort = TRUE
- ) %>%
- as.data.frame() # for comparison purposes, remove the tbl_df aspect
-test_that("as_tabyl works on result of a non-janitor count/pivot_wider", {
- expect_equal(
- as_tabyl(a),
- as_tabyl(b, 2, "cyl", "carb")
- )
-test_that("as_tabyl sets attributes correctly", {
- d <- as_tabyl(a)
- expect_equal(class(d), class(a))
- expect_equal(attr(d, "core"), untabyl(a))
- expect_equal(attr(d, "tabyl_type"), "two_way")
-test_that("untabyl puts back to original form", {
- expect_equal(mtcars, untabyl(as_tabyl(mtcars)))
-test_that("untabyl warns if called on non-tabyl", {
- expect_warning(
- untabyl(mtcars),
- "untabyl\\(\\) called on a non-tabyl"
- )
-test_that("untabyl automatically invokes purrr::map when called on a 3-way tabyl", {
- three <- tabyl(mtcars, cyl, am, gear)
- expect_equal(
- untabyl(three), # vanilla call
- purrr::map(three, untabyl)
- )
-test_that("as_tabyl is okay with non-numeric columns", {
- e <- b %>%
- dplyr::mutate(extra = "val")
- expect_equal(attr(as_tabyl(e), "core"), e) # implied success of as_tabyl
-test_that("as_tabyl fails if no numeric columns in 2:n", {
- bad <- data.frame(
- a = 1:2,
- b = c("x", "y")
- )
- expect_error(as_tabyl(bad), "at least one one of columns 2:n must be of class numeric")
-test_that("bad inputs are caught", {
- expect_error(as_tabyl(mtcars, 3),
- "axes must be either 1 or 2",
- fixed = TRUE
- )
- expect_error(as_tabyl(1:10),
- "input must be a data.frame",
- fixed = TRUE
- )
- # don't pass names to a 1-way tabyl
- expect_error(
- as_tabyl(mtcars, axes = 1, row_var_name = "foo"),
- "variable names are only meaningful for two-way tabyls"
- )
-test_that("adorn_totals and adorn_percentages reset the tabyl's core to reflect sorting, #407", {
- unsorted <- mtcars %>% tabyl(am, cyl)
- sorted <- dplyr::arrange(unsorted, desc(`4`))
- expect_equal(
- sorted %>%
- adorn_totals() %>%
- attr(., "core"),
- sorted %>%
- untabyl()
- )
- expect_equal(
- sorted %>%
- adorn_percentages() %>%
- attr(., "core"),
- sorted %>%
- untabyl()
- )
- # both:
- expect_equal(
- sorted %>%
- adorn_totals() %>%
- adorn_percentages() %>%
- attr(., "core"),
- sorted %>%
- untabyl()
- )
- # Ns with "Total" row sorted to top - the Total N should be up there too:
- expect_equal(
- sorted %>%
- adorn_totals() %>%
- adorn_percentages("col") %>%
- dplyr::arrange(desc(`4`)) %>%
- adorn_ns() %>%
- dplyr::pull(`4`) %>%
- dplyr::first(),
- "1.0000000 (11)"
- )
-# Tests for data.frame renaming function
-cyl_tbl <- tabyl(mtcars$cyl)
-test_that("counts are accurate", {
- expect_equal(cyl_tbl$`mtcars$cyl`, c(4, 6, 8))
- expect_equal(cyl_tbl$n, c(11, 7, 14))
-test_that("percentages are accurate", {
- expect_equal(cyl_tbl$percent, c(11 / 32, 7 / 32, 14 / 32))
-# Character input, with and without NA
-test_df <- data.frame(grp = c("a", "b", "b", "c"), stringsAsFactors = FALSE)
-test_df_na <- data.frame(grp = c("a", "b", "b", "c", NA), stringsAsFactors = FALSE)
-test_res <- tabyl(test_df$grp)
-test_res_na <- tabyl(test_df_na$grp)
-test_that("names are right", {
- expect_equal(names(cyl_tbl), c("mtcars$cyl", "n", "percent"))
- expect_equal(names(test_res_na), c("test_df_na$grp", "n", "percent", "valid_percent"))
-test_that("named vectors are handled properly", { # issue 144
- x <- c(a = "x", b = "y", c = "z")
- expect_equal(names(tabyl(x))[1], "x")
-test_that("NAs handled correctly", {
- expect_equal(test_res_na$percent, c(0.2, 0.4, 0.2, 0.2))
- expect_equal(test_res_na$valid_percent, c(0.25, 0.5, 0.25, NA))
-test_that("show_NA = FALSE parameter works, incl. with piped input", {
- resss <- test_res
- names(resss)[1] <- "test_df_na$grp"
- names(attr(resss, "core"))[1] <- "test_df_na$grp"
- expect_equal(
- resss,
- tabyl(test_df_na$grp, show_na = FALSE)
- )
- names(attr(resss, "core"))[1] <- "grp"
- names(resss)[1] <- "grp" # for this next instance, col name changes
- expect_equal(
- resss,
- test_df_na %>% tabyl(grp, show_na = FALSE)
- )
-test_that("ordering of result by factor levels is preserved for factors", {
- expect_equal(tabyl(factor(c("x", "y", "z"), levels = c("y", "z", "x")))[[1]], factor(c("y", "z", "x"), levels = c("y", "z", "x")))
-# missing factor levels shown, with and without NA
-fac <- iris[["Species"]][70:80] # to get versicolor, not the first alphabetically
-fac_na <- fac
-fac_na[1:2] <- NA
-test_that("missing factor levels are displayed without NA values", {
- expect_equal(tabyl(fac)[[1]], factor(c("setosa", "versicolor", "virginica"), levels = c("setosa", "versicolor", "virginica")))
- expect_equal(tabyl(fac)[[2]], c(0, 11, 0))
- expect_equal(tabyl(fac)[[3]], c(0, 1, 0))
-test_that("missing factor levels are displayed with NA values", {
- expect_equal(tabyl(fac_na)[[1]], factor(c("setosa", "versicolor", "virginica", NA), levels = c("setosa", "versicolor", "virginica")))
- expect_equal(tabyl(fac_na)[[2]], c(0, 9, 0, 2))
- expect_equal(tabyl(fac_na)[[3]], c(0, 9 / 11, 0, 2 / 11))
- expect_equal(tabyl(fac_na)[[4]], c(0, 1, 0, NA))
-# piping
-test_that("piping in a data.frame works", {
- x <- tabyl(mtcars$cyl)
- names(x)[1] <- "cyl"
- names(attr(x, "core"))[1] <- "cyl"
- expect_equal(
- x,
- mtcars %>% tabyl(cyl)
- )
-test_that("column1 stays its original data type per #168, in both resulting tabyl and core", {
- # test character, logical, numeric, factor X both values for show_missing_levels; confirm class in core and in main result
- # do those 8 tests in a loop?
- loop_df <- data.frame(
- a = c(TRUE, FALSE, TRUE),
- b = c("x", "y", "y"),
- c = c(1, 1, 2), stringsAsFactors = FALSE
- )
- for (i in c("logical", "numeric", "character")) {
- for (j in c(TRUE, FALSE)) {
- loop_df_temp <- loop_df
- class(loop_df_temp$a) <- i
- loop_tab <- loop_df_temp %>% tabyl(a, b, c, show_missing_levels = j)
- expect_equal(class(loop_tab[[1]]$a), class(loop_df_temp$a))
- expect_equal(class(attr(loop_tab[[1]], "core")$a), class(loop_df_temp$a)) # check core class
- }
- }
- loop_df$a <- factor(c("hi", "lo", "hi"))
- for (j in c(TRUE, FALSE)) {
- loop_df_temp <- loop_df
- loop_tab <- loop_df_temp %>% tabyl(a, b, c, show_missing_levels = j)
- expect_equal(class(loop_tab[[1]]$a), class(loop_df_temp$a))
- expect_equal(levels(loop_tab[[1]]$a), levels(loop_df_temp$a))
- expect_equal(class(attr(loop_tab[[1]], "core")$a), class(loop_df_temp$a)) # check core class and levels
- expect_equal(levels(attr(loop_tab[[1]], "core")$a), levels(loop_df_temp$a))
- }
-# bad inputs
-test_that("failure occurs when passed unsupported types", {
- expect_error(tabyl(matrix(1:10, nrow = 5)), "input must be a vector of type logical, numeric, character, list, or factor")
- expect_error(tabyl(complex(10)), "input must be a vector of type logical, numeric, character, list, or factor")
-test_that("bad input variable name is preserved", {
- expect_equal(
- mtcars %>% dplyr::mutate(`bad name` = cyl) %>% tabyl(`bad name`) %>% names() %>% .[[1]],
- "bad name"
- )
- k <- mtcars %>% dplyr::mutate(`bad name` = cyl)
- expect_equal(
- tabyl(k$`bad name`) %>% names() %>% .[[1]],
- "k$`bad name`"
- )
-test_that("input variable names 'percent' and 'n' are handled", {
- a <- mtcars %>% tabyl(mpg)
- expect_equal(
- a %>% tabyl(percent),
- as_tabyl(
- data.frame(
- percent = c(1 / 32, 2 / 32),
- n = c(18, 7),
- percent_percent = c(18 / 25, 7 / 25)
- ),
- 1
- )
- )
- expect_equal(
- a %>% tabyl(n),
- as_tabyl(
- data.frame(
- n = 1:2,
- n_n = c(18, 7),
- percent = c(18 / 25, 7 / 25)
- ),
- 1
- )
- )
-test_that("bizarre combination of %>%, quotes, and spaces in names is handled", {
- dat <- data.frame(
- `The candidate(s) applied directly to my school` = c("a", "b", "a", "b"),
- check.names = FALSE,
- stringsAsFactors = FALSE
- )
- expect_equal(
- tabyl(dat$`The candidate(s) applied directly to my school` %>% gsub("hi", "there", .)) %>%
- names() %>%
- .[1],
- "dat$`The candidate(s) applied directly to my school` %>% gsub(\"hi\", \"there\", .)"
- )
-test_that("grouped data.frame inputs are handled (#125)", {
- expect_equal(
- mtcars %>% dplyr::group_by(cyl) %>% tabyl(carb, gear),
- mtcars %>% tabyl(carb, gear)
- )
-test_that("if called on non-existent vector, returns useful error message", {
- expect_error(tabyl(mtcars$moose), "object mtcars\\$moose not found")
- expect_error(tabyl(moose), "object 'moose' not found")
- expect_error(mtcars %>% tabyl(moose))
-test_that("if called on data.frame with no or irregular columns specified, returns informative error message", {
- expect_error(tabyl(mtcars), "if calling on a data.frame, specify unquoted column names(s) to tabulate. Did you mean to call tabyl() on a vector?",
- fixed = TRUE
- )
- expect_error(tabyl(mtcars, var2 = am),
- "please specify var1 OR var1 & var2 OR var1 & var2 & var3",
- fixed = TRUE
- )
-test_that("fails if called on a non-data.frame list", { # it's not meant to do this and result will likely be garbage, so fail
- L <- list(a = 1, b = "rstats")
- expect_error(tabyl(L),
- "tabyl() is meant to be called on vectors and data.frames; convert non-data.frame lists to one of these types",
- fixed = TRUE
- )
-# showing missing factor levels
-test_that("show_missing_levels parameter works", {
- z <- structure(
- list(
- a = structure(1, .Label = c("hi", "lo"), class = "factor"),
- b = structure(2, .Label = c("big", "small"), class = "factor"),
- new = structure(1, .Label = c("lvl1", "lvl2"), class = "factor")
- ),
- row.names = c(NA, -1L), class = c("tbl_df", "tbl", "data.frame"),
- .Names = c("a", "b", "new")
- )
- expect_equal(
- z %>% tabyl(a, b, new, show_missing_levels = TRUE),
- list(lvl1 = data.frame(
- a = c("hi", "lo"),
- big = c(0, 0),
- small = c(1, 0),
- stringsAsFactors = TRUE
- ) %>% as_tabyl(2, "a", "b"))
- )
- expect_equal(
- z %>% tabyl(a, b, new, show_missing_levels = FALSE) %>% .[[1]],
- data.frame(
- a = factor("hi", levels = c("hi", "lo")),
- small = c(1)
- ) %>% as_tabyl(2, "a", "b")
- )
- # Works with numerics
- expect_equal(
- mtcars %>% tabyl(cyl, am),
- data.frame(
- cyl = c(4, 6, 8),
- `0` = c(3, 4, 12),
- `1` = c(8, 3, 2),
- check.names = FALSE
- ) %>% as_tabyl(2, "cyl", "am")
- )
-# NA handling - position and removal
-# Putting this outside the following test block for later re-use
-x <- data.frame(
- a = c(1, 2, 2, 2, 1, 1, 1, NA, NA, 1),
- b = c(rep("up", 4), rep("down", 4), NA, NA),
- c = 10,
- d = c(NA, 10:2),
- stringsAsFactors = FALSE
-test_that("NA levels get moved to the last column in the data.frame, are suppressed properly", {
- y <- tabyl(x, a, b) %>%
- untabyl()
- expect_equal(
- y,
- data.frame(
- a = c(1, 2, NA),
- down = c(3, 0, 1),
- up = c(1, 3, 0),
- NA_ = c(1, 0, 1)
- )
- )
- expect_equal(
- tabyl(x, a, b, show_na = FALSE) %>%
- untabyl(),
- data.frame(
- a = c(1, 2),
- down = c(3, 0),
- up = c(1, 3)
- )
- )
- # one-way suppression
- expect_equal(
- tabyl(x$a, show_na = FALSE) %>%
- untabyl(),
- data.frame(
- `x$a` = 1:2,
- n = c(5, 3),
- percent = c(0.625, 0.375),
- check.names = FALSE
- )
- )
- # NA level is shown in 3 way split
- y <- x %>% tabyl(c, a, b, show_missing_levels = FALSE)
- expect_equal(length(y), 3)
- expect_equal(names(y), c("down", "up", "NA_"))
- expect_equal(
- y[["NA_"]], # column c remains numeric
- x %>%
- dplyr::filter(is.na(b)) %>%
- tabyl(c, a)
- )
- y_with_missing <- x %>% tabyl(c, a, b, show_missing_levels = TRUE)
- expect_equal(length(y_with_missing), 3)
- expect_equal(names(y_with_missing), c("down", "up", "NA_"))
- expect_equal(
- y_with_missing[["NA_"]] %>% untabyl(), # column c remains numeric
- data.frame(c = 10, `1` = 1, `2` = 0, NA_ = 1, check.names = FALSE)
- )
- # If no NA in 3rd variable, it doesn't appear in split list
- expect_equal(length(dplyr::starwars %>%
- dplyr::filter(species == "Human") %>%
- tabyl(eye_color, skin_color, gender, show_missing_levels = TRUE)), 2)
- # If there is NA, it does appear in split list
- expect_equal(length(dplyr::starwars %>%
- tabyl(eye_color, skin_color, gender, show_missing_levels = TRUE)), 3)
- expect_equal(length(dplyr::starwars %>%
- tabyl(eye_color, skin_color, gender, show_missing_levels = FALSE)), 3)
- # NA level in the list gets suppressed if show_na = FALSE. Should have one less level if NA is suppressed.
- expect_equal(length(dplyr::starwars %>%
- tabyl(eye_color, skin_color, gender, show_na = TRUE)), 3)
- expect_equal(length(dplyr::starwars %>%
- tabyl(eye_color, skin_color, gender, show_na = FALSE)), 2)
-test_that("tabyl fill 0s with show_missing_levels = FALSE", {
- res <- x %>% tabyl(a, b, show_missing_levels = FALSE)
- got <- data.frame(a = c(1, 2, NA), down = c(3L, 0L, 1L), up = c(1L, 3L, 0L), NA_ = c(1L, 0L, 1L)) %>%
- structure(
- class = c("tabyl", "data.frame"),
- core = data.frame(a = c(1, 2, NA), down = c(3L, 0L, 1L), up = c(1L, 3L, 0L), NA_ = c(1L, 0L, 1L)),
- tabyl_type = "two_way",
- var_names = list(row = "a", col = "b")
- )
- expect_equal(res, got)
-test_that("zero-row and fully-NA inputs are handled", {
- zero_vec <- character(0)
- expect_equal(nrow(tabyl(zero_vec)), 0)
- expect_equal(names(tabyl(zero_vec)), c("zero_vec", "n", "percent"))
- zero_df <- data.frame(a = character(0), b = character(0))
- expect_message(
- expect_equal(nrow(tabyl(zero_df, a, b)), 0)
- )
- expect_message(
- expect_equal(names(tabyl(zero_df, a, b)), "a"),
- "No records to count so returning a zero-row tabyl"
- )
- all_na_df <- data.frame(a = c(NA, NA), b = c(NA_character_, NA_character_))
- expect_message(
- expect_equal(tabyl(all_na_df, a, b, show_na = FALSE) %>% nrow(), 0)
- )
- expect_message(
- expect_equal(tabyl(all_na_df, a, b, show_na = FALSE) %>% names(), "a"),
- "No records to count so returning a zero-row tabyl"
- )
-test_that("print.tabyl prints without row numbers", {
- expect_equal(
- mtcars %>% tabyl(am, cyl) %>% capture.output(),
- c(" am 4 6 8", " 0 3 4 12", " 1 8 3 2")
- )
-test_that("the dplyr warning suggesting forcats::fct_explicit_na that is generated by a tabyl of a factor with NA values is caught ", {
- # leaving this in as I'd want to know if it ever gets loud again, but the warning seems to be gone in
- # dplyr 1.0.0 and I have removed the withCallingHandlers({}) code in tabyl() that this was testing
- expect_silent(
- tabyl(factor(c("a", "b", NA)))
- )
- xx <- data.frame(
- a = factor(c("a", "b", NA)),
- b = 1:3
- )
- expect_silent(xx %>%
- tabyl(a, b))
-test_that("3-way tabyl with 3rd var factor is listed in right order, #250", {
- z <- mtcars
- z$cyl <- factor(z$cyl, levels = c(4, 8, 6))
- expect_equal(names(tabyl(z, am, gear, cyl)), c("4", "8", "6"))
- z$cyl[32] <- NA
- expect_equal(names(tabyl(z, am, gear, cyl)), c("4", "8", "6", "NA_"))
- expect_equal(names(tabyl(z, am, gear, cyl, show_na = FALSE)), c("4", "8", "6"))
- z <- z %>% dplyr::filter(!cyl %in% "4")
- expect_equal(names(tabyl(z, am, gear, cyl)), c("8", "6", "NA_"))
-test_that("tabyl works with label attributes (#394)", {
- mt_label <- mtcars
- attr(mt_label$cyl, "label") <- "Number of cyl"
- tab <- tabyl(mt_label, cyl)
- expect_named(tab, c("Number of cyl", "n", "percent"))
- tab2 <- tabyl(mt_label, cyl, am)
- expect_named(tab2, c("Number of cyl", "0", "1"))
- tab3 <- tabyl(mt_label, cyl, am, vs)
- expect_equal(names(tab3[[1]])[1], "Number of cyl")
-test_that("tabyl works with ordered 1st variable, #386", {
- mt_ordered <- mtcars
- mt_ordered$cyl <- ordered(mt_ordered$cyl, levels = c("4", "8", "6"))
- ordered_3way <- mt_ordered %>%
- tabyl(cyl, gear, am)
- expect_equal(class(ordered_3way[[1]]$cyl), c("ordered", "factor")) # 1st col in resulting tabyl
- expect_equal(class(attr(ordered_3way[[1]], "core")$cyl), c("ordered", "factor")) # 1st col in tabyl core
-test_that("factor ordering of columns is correct in 2-way tabyl", {
- two_factors <- data.frame(
- x = factor(c("big", "small", "medium", "small"),
- levels = c("small", "medium", "big")
- ),
- y = factor(c("hi", "hi", "hi", "lo"),
- levels = c("lo", "hi")
- )
- )
- expect_equal(
- two_factors %>%
- tabyl(x, y) %>%
- names(),
- c("x", "lo", "hi")
- )
-test_that("empty strings converted to _emptystring", {
- mt_empty <- mtcars
- mt_empty$cyl[1:2] <- c("", NA_character_)
- expect_equal(
- mt_empty %>%
- tabyl(am, cyl) %>%
- names(),
- c("am", "4", "6", "8", "emptystring_", "NA_")
- )
-test_that("3way tabyls with factors in cols 1-2 are arranged correctly, #379", {
- dat_3wayfactors <- data.frame(
- gender = c("f", "m", "m", "f", "m"),
- age_group = factor(
- c("18-35", "46-55", "46-55", "36-45", ">55"),
- levels = c("18-35", "36-45", "46-55", ">55")
- ),
- bmi_group = factor(
- c("18.5 - 25", "25 - 30", "18.5 - 25", ">30", "<18.5"),
- levels = c("<18.5", "18.5 - 25", "25 - 30", ">30")
- ),
- stringsAsFactors = TRUE
- )
- tabyl_3wf <- dat_3wayfactors %>%
- tabyl(bmi_group, age_group, gender, show_missing_levels = FALSE)
- expect_equal(names(tabyl_3wf$m), c("bmi_group", "46-55", ">55"))
- expect_equal(
- tabyl_3wf$f[[1]],
- factor(
- c("18.5 - 25", ">30"),
- levels = c("<18.5", "18.5 - 25", "25 - 30", ">30")
- )
- )
-test_that("tabyl errors informatively called like tabyl(mtcars$cyl, mtcars$gear), #377", {
- expect_error(
- tabyl(mtcars$cyl, mtcars$am),
- regexp = "Did you try to call tabyl on two vectors"
- )
- has_logicals <- data.frame(x = 1:2, y = c(TRUE, FALSE))
- expect_error(
- tabyl(has_logicals$x, has_logicals$y),
- regexp = "Did you try to call tabyl on two vectors"
- )
- expect_type(
- has_logicals %>%
- tabyl(x, y),
- "list"
- )
-test_that("2-way tabyl with numeric column names is sorted numerically", {
- df <- data.frame(var1 = c(1:11), var2 = c(NA, 10:1))
- expect_equal(colnames(df %>% tabyl(var1, var2)), c("var1", 1:10, "NA_"))
-test_that("3-way tabyl with numeric names is sorted numerically", {
- expect_equal(
- names(mtcars %>% tabyl(gear, cyl, hp)),
- as.character(sort(unique(mtcars$hp)))
- )
- # Check putting NA last - data.frame "x" is created way above
- expect_equal(
- names(x %>% tabyl(a, c, d)),
- c(2:10, "NA_")
- )
-fac <- factor(c("a", "b", "c", "d", "e", "f", "f"), levels = rev(letters[1:6]))
-fac_odd_lvls <- factor(fac, levels = rev(letters[1:5]))
-# more tests - group names and ordering - are in test-get-level-groups.R
-test_that("top_levels values are correct", {
- expect_equal(top_levels(fac)[[3]], c(3 / 7, 2 / 7, 2 / 7)) # default n = 2, num_levels = 6
- expect_equal(top_levels(fac)[[2]], c(3, 2, 2))
- expect_equal(top_levels(fac, 3)[[3]], c(4 / 7, 3 / 7)) # n = 3, num_levels = 6
- expect_equal(top_levels(fac, 3)[[2]], c(4, 3))
- expect_equal(top_levels(fac_odd_lvls)[[2]], c(2, 1, 2)) # default n = 2, num_levels = 5
- expect_equal(top_levels(fac_odd_lvls)[[3]], c(0.4, 0.2, 0.4))
- expect_equal(top_levels(fac_odd_lvls, 1)[[2]], c(1, 3, 1)) # n = 1, num_levels = 5
- expect_equal(top_levels(fac_odd_lvls, 1)[[3]], c(0.2, 0.6, 0.2))
-test_that("top_levels missing levels are represented", {
- x <- as.factor(letters[1:5])[1:3]
- expect_equal(
- top_levels(x)[[1]],
- structure(1:3, .Label = c("a, b", "c", "d, e"), class = "factor")
- )
- expect_equal(
- top_levels(x)[[2]],
- c(2, 1, 0)
- )
-test_that("top_levels NA results are treated appropriately", {
- fac_na <- fac
- fac_na[7] <- NA
- expect_equal(top_levels(fac_na)[[2]], rep(2, 3))
- expect_equal(top_levels(fac_na, show_na = TRUE)[[2]], c(2, 2, 2, 1))
- expect_equal(top_levels(fac_na, show_na = TRUE)[[3]], c(2 / 7, 2 / 7, 2 / 7, 1 / 7))
- expect_equal(top_levels(fac_na, show_na = TRUE)[[4]], c(1 / 3, 1 / 3, 1 / 3, NA))
-test_that("top_levels default n parameter works", {
- expect_equal(top_levels(fac), top_levels(fac, 2))
-test_that("top_levels missing levels are treated appropriately", {
- fac_missing_lvl <- fac
- fac_missing_lvl[2] <- NA
- expect_equal(top_levels(fac_missing_lvl)[[2]], c(3, 2, 1))
-test_that("top_levels bad type inputs are handled", {
- expect_error(top_levels(c(0, 1), "factor_vec is not of type 'factor'"))
- expect_error(top_levels(c("hi", "lo"), "factor_vec is not of type 'factor'"))
- expect_error(top_levels(mtcars, "factor_vec is not of type 'factor'"))
-test_that("top_levels bad n value is handled", {
- expect_error(top_levels(fac, 4))
- expect_error(top_levels(fac_odd_lvls, 3))
- expect_error(top_levels(fac, 0))
- expect_error(top_levels(factor(c("a", "b"))), "input factor variable must have at least 3 levels")
-test_that("top_levels correct variable name assigned to first column of result", {
- expect_equal(names(top_levels(fac))[1], "fac")
-title: "tabyls: a tidy, fully-featured approach to counting things"
-date: '`r Sys.Date()`'
- rmarkdown::github_document
-vignette: >
- %\VignetteIndexEntry{tabyls}
- %\VignetteEngine{knitr::rmarkdown}
- %\VignetteEncoding{UTF-8}
-```{r chunk_options, include = FALSE}
-knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
-## Motivation: why tabyl?
-Analysts do a lot of counting. Indeed, it's been said that "data science is mostly counting things." But the base R function for counting, `table()`, leaves much to be desired:
-- It doesn't accept data.frame inputs (and thus doesn't play nicely with the `%>%` pipe)
-- It doesn't output data.frames
-- Its results are hard to format. Compare the look and formatting choices of an R table to a Microsoft Excel PivotTable or even the table formatting provided by SPSS.
-`tabyl()` is an approach to tabulating variables that addresses these shortcomings. It's part of the janitor package because counting is such a fundamental part of data cleaning and exploration.
-`tabyl()` is tidyverse-aligned and is primarily built upon the dplyr and tidyr packages.
-## How it works
-On its surface, `tabyl()` produces frequency tables using 1, 2, or 3 variables. Under the hood, `tabyl()` also attaches a copy of these counts as an attribute of the resulting data.frame.
-The result looks like a basic data.frame of counts, but because it's also a `tabyl` containing this metadata, you can use `adorn_` functions to add additional information and pretty formatting.
-The `adorn_` functions are built to work on `tabyls`, but have been adapted to work with similar, non-tabyl data.frames that need formatting.
-# Examples
-This vignette demonstrates `tabyl` in the context of studying humans in the `starwars` dataset from dplyr:
-```{r clean_starwars, warning = FALSE, message = FALSE}
-humans <- starwars %>%
- filter(species == "Human")
-## One-way tabyl
-Tabulating a single variable is the simplest kind of tabyl:
-```{r one_way, message=FALSE}
-t1 <- humans %>%
- tabyl(eye_color)
-When `NA` values are present, `tabyl()` also displays "valid" percentages, i.e., with missing values removed from the denominator. And while `tabyl()` is built to take a data.frame and column names, you can also produce a one-way tabyl by calling it directly on a vector:
-```{r one_way_vector}
-x <- c("big", "big", "small", "small", "small", NA)
-Most `adorn_` helper functions are built for 2-way tabyls, but those that make sense for a 1-way tabyl do work:
-```{r one_way_adorns}
-t1 %>%
- adorn_totals("row") %>%
- adorn_pct_formatting()
-## Two-way tabyl
-This is often called a "crosstab" or "contingency" table. Calling `tabyl` on two columns of a data.frame produces the same result as the common combination of `dplyr::count()`, followed by `tidyr::pivot_wider()` to wide form:
-```{r two_way}
-t2 <- humans %>%
- tabyl(gender, eye_color)
-Since it's a `tabyl`, we can enhance it with `adorn_` helper functions. For instance:
-```{r two_way_adorns}
-t2 %>%
- adorn_percentages("row") %>%
- adorn_pct_formatting(digits = 2) %>%
- adorn_ns()
-Adornments have options to control axes, rounding, and other relevant formatting choices (more on that below).
-## Three-way tabyl
-Just as `table()` accepts three variables, so does `tabyl()`, producing a list of tabyls:
-```{r three_Way}
-t3 <- humans %>%
- tabyl(eye_color, skin_color, gender)
-# the result is a tabyl of eye color x skin color, split into a list by gender
-If the `adorn_` helper functions are called on a list of data.frames - like the output of a three-way `tabyl` call - they will call `purrr::map()` to apply themselves to each data.frame in the list:
-```{r three_way_adorns, warning = FALSE, message = FALSE}
-humans %>%
- tabyl(eye_color, skin_color, gender, show_missing_levels = FALSE) %>%
- adorn_totals("row") %>%
- adorn_percentages("all") %>%
- adorn_pct_formatting(digits = 1) %>%
- adorn_ns() %>%
- adorn_title()
-This automatic mapping supports interactive data analysis that switches between combinations of 2 and 3 variables. That way, if a user starts with `humans %>% tabyl(eye_color, skin_color)`, adds some `adorn_` calls, then decides to split the tabulation by gender and modifies their first line to `humans %>% tabyl(eye_color, skin_color, gender`), they don't have to rewrite the subsequent adornment calls to use `map()`.
-However, if feels more natural to call these with `map()` or `lapply()`, that is still supported. For instance, `t3 %>% lapply(adorn_percentages)` would produce the same result as `t3 %>% adorn_percentages`.
-### Other features of tabyls
-+ When called on a factor, `tabyl` will show missing levels (levels not present in the data) in the result
- + This can be suppressed if not desired
-+ `NA` values can be displayed or suppressed
-+ `tabyls` print without displaying row numbers
-You can call `chisq.test()` and `fisher.test()` on a two-way tabyl to perform those statistical tests, just like on a base R `table()` object.
-## The `adorn_*` functions
-These modular functions build on a `tabyl` to approximate the functionality of a PivotTable in Microsoft Excel. They print elegant results for interactive analysis or for sharing in a report, e.g., with `knitr::kable()`. For example:
-humans %>%
- tabyl(gender, eye_color) %>%
- adorn_totals(c("row", "col")) %>%
- adorn_percentages("row") %>%
- adorn_pct_formatting(rounding = "half up", digits = 0) %>%
- adorn_ns() %>%
- adorn_title("combined") %>%
- knitr::kable()
-### The adorn functions are:
-+ **`adorn_totals()`**: Add totals row, column, or both.
-+ **`adorn_percentages()`**: Calculate percentages along either axis or over the entire tabyl
-+ **`adorn_pct_formatting()`**: Format percentage columns, controlling the number of digits to display and whether to append the `%` symbol
-+ **`adorn_rounding()`**: Round a data.frame of numbers (usually the result of `adorn_percentages`), either using the base R `round()` function or using janitor's `round_half_up()` to round all ties up ([thanks, StackOverflow](https://stackoverflow.com/a/12688836/4470365)).
- + e.g., round 10.5 up to 11, consistent with Excel's tie-breaking behavior.
- + This contrasts with rounding 10.5 down to 10 as in base R's `round(10.5)`.
- + `adorn_rounding()` returns columns of class `numeric`, allowing for graphing, sorting, etc. It's a less-aggressive substitute for `adorn_pct_formatting()`; these two functions should not be called together.
-+ **`adorn_ns()`**: add Ns to a tabyl. These can be drawn from the tabyl's underlying counts, which are attached to the tabyl as metadata, or they can be supplied by the user.
-+ **`adorn_title()`**: add a title to a tabyl (or other data.frame). Options include putting the column title in a new row on top of the data.frame or combining the row and column titles in the data.frame's first name slot.
-These adornments should be called in a logical order, e.g., you probably want to add totals before percentages are calculated. In general, call them in the order they appear above.
-## BYOt (Bring Your Own tabyl)
-You can also call `adorn_` functions on other data.frames, not only the results of calls to `tabyl()`. E.g., `mtcars %>% adorn_totals("col") %>% adorn_percentages("col")` performs as expected, despite `mtcars` not being a `tabyl`.
-This can be handy when you have a data.frame that is not a simple tabulation generated by `tabyl` but would still benefit from the `adorn_` formatting functions.
-A simple example: calculate the proportion of records meeting a certain condition, then format the results.
-```{r first_non_tabyl}
-percent_above_165_cm <- humans %>%
- group_by(gender) %>%
- summarise(pct_above_165_cm = mean(height > 165, na.rm = TRUE), .groups = "drop")
-percent_above_165_cm %>%
- adorn_pct_formatting()
-You can control which columns are adorned by using the `...` argument. It accepts the [tidyselect helpers](https://r4ds.had.co.nz/transform.html#select). That is, you can specify columns the same way you would using `dplyr::select()`.
-For instance, say you have a numeric column that should not be included in percentage formatting and you wish to exempt it. Here, only the `proportion` column is adorned:
-```{r tidyselect, warning = FALSE, message = FALSE}
-mtcars %>%
- count(cyl, gear) %>%
- rename(proportion = n) %>%
- adorn_percentages("col", na.rm = TRUE, proportion) %>%
- adorn_pct_formatting(, , , proportion) # the commas say to use the default values of the other arguments
-Here we specify that only two consecutive numeric columns should be totaled (`year` is numeric but should not be included):
-```{r dont_total, warning = FALSE, message = FALSE}
-cases <- data.frame(
- region = c("East", "West"),
- year = 2015,
- recovered = c(125, 87),
- died = c(13, 12)
-cases %>%
- adorn_totals(c("col", "row"), fill = "-", na.rm = TRUE, name = "Total Cases", recovered:died)
-Here's a more complex example that uses a data.frame of means, not counts. We create a table containing the mean of a 3rd variable when grouped by two other variables, then use `adorn_` functions to round the values and append Ns. The first part is pretty straightforward:
-```{r more_non_tabyls, warning = FALSE, message = FALSE}
-library(tidyr) # for pivot_wider()
-mpg_by_cyl_and_am <- mtcars %>%
- group_by(cyl, am) %>%
- summarise(mpg = mean(mpg), .groups = "drop") %>%
- pivot_wider(names_from = am, values_from = mpg)
-Now to `adorn_` it. Since this is not the result of a `tabyl()` call, it doesn't have the underlying Ns stored in the `core` attribute, so we'll have to supply them:
-```{r add_the_Ns}
-mpg_by_cyl_and_am %>%
- adorn_rounding() %>%
- adorn_ns(
- ns = mtcars %>% # calculate the Ns on the fly by calling tabyl on the original data
- tabyl(cyl, am)
- ) %>%
- adorn_title("combined", row_name = "Cylinders", col_name = "Is Automatic")
-If needed, Ns can be manipulated in their own data.frame before they are appended. Here a tabyl with values in the thousands has its Ns formatted to include the separating character `,` as typically seen in American numbers, e.g., `3,000`.
-First we create the tabyl to adorn:
-```{r formatted_Ns_thousands_prep}
-raw_data <- data.frame(
- sex = rep(c("m", "f"), 3000),
- age = round(runif(3000, 1, 102), 0)
-raw_data$agegroup <- cut(raw_data$age, quantile(raw_data$age, c(0, 1 / 3, 2 / 3, 1)))
-comparison <- raw_data %>%
- tabyl(agegroup, sex, show_missing_levels = FALSE) %>%
- adorn_totals(c("row", "col")) %>%
- adorn_percentages("col") %>%
- adorn_pct_formatting(digits = 1)
-At this point, the Ns are unformatted:
-```{r adorn_ns_unformatted}
-comparison %>%
- adorn_ns()
-Now we format them to insert the thousands commas. A tabyl's raw Ns are stored in its `"core"` attribute. Here we retrieve those with `attr()`, then apply the base R function `format()` to all numeric columns. Lastly, we append these Ns using `adorn_ns()`.
-```{r formatted_Ns_thousands}
-formatted_ns <- attr(comparison, "core") %>% # extract the tabyl's underlying Ns
- adorn_totals(c("row", "col")) %>% # to match the data.frame we're appending to
- dplyr::mutate(across(where(is.numeric), ~ format(.x, big.mark = ",")))
-comparison %>%
- adorn_ns(position = "rear", ns = formatted_ns)
-### Questions? Comments?
-File [an issue on GitHub](https://github.com/sfirke/janitor/issues) if you have suggestions related to `tabyl()` and its `adorn_` helpers or encounter problems while using them.
-tabyls: a tidy, fully-featured approach to counting things
-## Motivation: why tabyl?
-Analysts do a lot of counting. Indeed, it’s been said that “data science
-is mostly counting things.” But the base R function for counting,
-`table()`, leaves much to be desired:
-- It doesn’t accept data.frame inputs (and thus doesn’t play nicely with
- the `%>%` pipe)
-- It doesn’t output data.frames
-- Its results are hard to format. Compare the look and formatting
- choices of an R table to a Microsoft Excel PivotTable or even the
- table formatting provided by SPSS.
-`tabyl()` is an approach to tabulating variables that addresses these
-shortcomings. It’s part of the janitor package because counting is such
-a fundamental part of data cleaning and exploration.
-`tabyl()` is tidyverse-aligned and is primarily built upon the dplyr and
-tidyr packages.
-## How it works
-On its surface, `tabyl()` produces frequency tables using 1, 2, or 3
-variables. Under the hood, `tabyl()` also attaches a copy of these
-counts as an attribute of the resulting data.frame.
-The result looks like a basic data.frame of counts, but because it’s
-also a `tabyl` containing this metadata, you can use `adorn_` functions
-to add additional information and pretty formatting.
-The `adorn_` functions are built to work on `tabyls`, but have been
-adapted to work with similar, non-tabyl data.frames that need
-# Examples
-This vignette demonstrates `tabyl` in the context of studying humans in
-the `starwars` dataset from dplyr:
-``` r
-humans <- starwars %>%
- filter(species == "Human")
-## One-way tabyl
-Tabulating a single variable is the simplest kind of tabyl:
-``` r
-t1 <- humans %>%
- tabyl(eye_color)
-#> eye_color n percent
-#> blue 12 0.34285714
-#> blue-gray 1 0.02857143
-#> brown 17 0.48571429
-#> dark 1 0.02857143
-#> hazel 2 0.05714286
-#> yellow 2 0.05714286
-When `NA` values are present, `tabyl()` also displays “valid”
-percentages, i.e., with missing values removed from the denominator. And
-while `tabyl()` is built to take a data.frame and column names, you can
-also produce a one-way tabyl by calling it directly on a vector:
-``` r
-x <- c("big", "big", "small", "small", "small", NA)
-#> x n percent valid_percent
-#> big 2 0.3333333 0.4
-#> small 3 0.5000000 0.6
-#> 1 0.1666667 NA
-Most `adorn_` helper functions are built for 2-way tabyls, but those
-that make sense for a 1-way tabyl do work:
-``` r
-t1 %>%
- adorn_totals("row") %>%
- adorn_pct_formatting()
-#> eye_color n percent
-#> blue 12 34.3%
-#> blue-gray 1 2.9%
-#> brown 17 48.6%
-#> dark 1 2.9%
-#> hazel 2 5.7%
-#> yellow 2 5.7%
-#> Total 35 100.0%
-## Two-way tabyl
-This is often called a “crosstab” or “contingency” table. Calling
-`tabyl` on two columns of a data.frame produces the same result as the
-common combination of `dplyr::count()`, followed by
-`tidyr::pivot_wider()` to wide form:
-``` r
-t2 <- humans %>%
- tabyl(gender, eye_color)
-#> gender blue blue-gray brown dark hazel yellow
-#> feminine 3 0 5 0 1 0
-#> masculine 9 1 12 1 1 2
-Since it’s a `tabyl`, we can enhance it with `adorn_` helper functions.
-For instance:
-``` r
-t2 %>%
- adorn_percentages("row") %>%
- adorn_pct_formatting(digits = 2) %>%
- adorn_ns()
-#> gender blue blue-gray brown dark hazel yellow
-#> feminine 33.33% (3) 0.00% (0) 55.56% (5) 0.00% (0) 11.11% (1) 0.00% (0)
-#> masculine 34.62% (9) 3.85% (1) 46.15% (12) 3.85% (1) 3.85% (1) 7.69% (2)
-Adornments have options to control axes, rounding, and other relevant
-formatting choices (more on that below).
-## Three-way tabyl
-Just as `table()` accepts three variables, so does `tabyl()`, producing
-a list of tabyls:
-``` r
-t3 <- humans %>%
- tabyl(eye_color, skin_color, gender)
-# the result is a tabyl of eye color x skin color, split into a list by gender
-#> $feminine
-#> eye_color dark fair light pale tan white
-#> blue 0 2 1 0 0 0
-#> blue-gray 0 0 0 0 0 0
-#> brown 0 1 4 0 0 0
-#> dark 0 0 0 0 0 0
-#> hazel 0 0 1 0 0 0
-#> yellow 0 0 0 0 0 0
-#> $masculine
-#> eye_color dark fair light pale tan white
-#> blue 0 7 2 0 0 0
-#> blue-gray 0 1 0 0 0 0
-#> brown 3 4 3 0 2 0
-#> dark 1 0 0 0 0 0
-#> hazel 0 1 0 0 0 0
-#> yellow 0 0 0 1 0 1
-If the `adorn_` helper functions are called on a list of data.frames -
-like the output of a three-way `tabyl` call - they will call
-`purrr::map()` to apply themselves to each data.frame in the list:
-``` r
-humans %>%
- tabyl(eye_color, skin_color, gender, show_missing_levels = FALSE) %>%
- adorn_totals("row") %>%
- adorn_percentages("all") %>%
- adorn_pct_formatting(digits = 1) %>%
- adorn_ns %>%
- adorn_title
-#> $feminine
-#> skin_color
-#> eye_color fair light
-#> blue 22.2% (2) 11.1% (1)
-#> brown 11.1% (1) 44.4% (4)
-#> hazel 0.0% (0) 11.1% (1)
-#> Total 33.3% (3) 66.7% (6)
-#> $masculine
-#> skin_color
-#> eye_color dark fair light pale tan white
-#> blue 0.0% (0) 26.9% (7) 7.7% (2) 0.0% (0) 0.0% (0) 0.0% (0)
-#> blue-gray 0.0% (0) 3.8% (1) 0.0% (0) 0.0% (0) 0.0% (0) 0.0% (0)
-#> brown 11.5% (3) 15.4% (4) 11.5% (3) 0.0% (0) 7.7% (2) 0.0% (0)
-#> dark 3.8% (1) 0.0% (0) 0.0% (0) 0.0% (0) 0.0% (0) 0.0% (0)
-#> hazel 0.0% (0) 3.8% (1) 0.0% (0) 0.0% (0) 0.0% (0) 0.0% (0)
-#> yellow 0.0% (0) 0.0% (0) 0.0% (0) 3.8% (1) 0.0% (0) 3.8% (1)
-#> Total 15.4% (4) 50.0% (13) 19.2% (5) 3.8% (1) 7.7% (2) 3.8% (1)
-This automatic mapping supports interactive data analysis that switches
-between combinations of 2 and 3 variables. That way, if a user starts
-with `humans %>% tabyl(eye_color, skin_color)`, adds some `adorn_`
-calls, then decides to split the tabulation by gender and modifies their
-first line to `humans %>% tabyl(eye_color, skin_color, gender`), they
-don’t have to rewrite the subsequent adornment calls to use `map()`.
-However, if feels more natural to call these with `map()` or `lapply()`,
-that is still supported. For instance,
-`t3 %>% lapply(adorn_percentages)` would produce the same result as
-`t3 %>% adorn_percentages`.
-### Other features of tabyls
-- When called on a factor, `tabyl` will show missing levels (levels not
- present in the data) in the result
- - This can be suppressed if not desired
-- `NA` values can be displayed or suppressed
-- `tabyls` print without displaying row numbers
-You can call `chisq.test()` and `fisher.test()` on a two-way tabyl to
-perform those statistical tests, just like on a base R `table()` object.
-## The `adorn_*` functions
-These modular functions build on a `tabyl` to approximate the
-functionality of a PivotTable in Microsoft Excel. They print elegant
-results for interactive analysis or for sharing in a report, e.g., with
-`knitr::kable()`. For example:
-``` r
-humans %>%
- tabyl(gender, eye_color) %>%
- adorn_totals(c("row", "col")) %>%
- adorn_percentages("row") %>%
- adorn_pct_formatting(rounding = "half up", digits = 0) %>%
- adorn_ns() %>%
- adorn_title("combined") %>%
- knitr::kable()
-| gender/eye_color | blue | blue-gray | brown | dark | hazel | yellow | Total |
-| feminine | 33% (3) | 0% (0) | 56% (5) | 0% (0) | 11% (1) | 0% (0) | 100% (9) |
-| masculine | 35% (9) | 4% (1) | 46% (12) | 4% (1) | 4% (1) | 8% (2) | 100% (26) |
-| Total | 34% (12) | 3% (1) | 49% (17) | 3% (1) | 6% (2) | 6% (2) | 100% (35) |
-### The adorn functions are:
-- **`adorn_totals()`**: Add totals row, column, or both.
-- **`adorn_percentages()`**: Calculate percentages along either axis or
- over the entire tabyl
-- **`adorn_pct_formatting()`**: Format percentage columns, controlling
- the number of digits to display and whether to append the `%` symbol
-- **`adorn_rounding()`**: Round a data.frame of numbers (usually the
- result of `adorn_percentages`), either using the base R `round()`
- function or using janitor’s `round_half_up()` to round all ties up
- ([thanks,
- StackOverflow](https://stackoverflow.com/a/12688836/4470365)).
- - e.g., round 10.5 up to 11, consistent with Excel’s tie-breaking
- behavior.
- - This contrasts with rounding 10.5 down to 10 as in base R’s
- `round(10.5)`.
- - `adorn_rounding()` returns columns of class `numeric`, allowing for
- graphing, sorting, etc. It’s a less-aggressive substitute for
- `adorn_pct_formatting()`; these two functions should not be called
- together.
-- **`adorn_ns()`**: add Ns to a tabyl. These can be drawn from the
- tabyl’s underlying counts, which are attached to the tabyl as
- metadata, or they can be supplied by the user.
-- **`adorn_title()`**: add a title to a tabyl (or other data.frame).
- Options include putting the column title in a new row on top of the
- data.frame or combining the row and column titles in the data.frame’s
- first name slot.
-These adornments should be called in a logical order, e.g., you probably
-want to add totals before percentages are calculated. In general, call
-them in the order they appear above.
-## BYOt (Bring Your Own tabyl)
-You can also call `adorn_` functions on other data.frames, not only the
-results of calls to `tabyl()`. E.g.,
-`mtcars %>% adorn_totals("col") %>% adorn_percentages("col")` performs
-as expected, despite `mtcars` not being a `tabyl`.
-This can be handy when you have a data.frame that is not a simple
-tabulation generated by `tabyl` but would still benefit from the
-`adorn_` formatting functions.
-A simple example: calculate the proportion of records meeting a certain
-condition, then format the results.
-``` r
-percent_above_165_cm <- humans %>%
- group_by(gender) %>%
- summarise(pct_above_165_cm = mean(height > 165, na.rm = TRUE), .groups = "drop")
-percent_above_165_cm %>%
- adorn_pct_formatting()
-#> # A tibble: 2 × 2
-#> gender pct_above_165_cm
-#> 1 feminine 12.5%
-#> 2 masculine 100.0%
-You can control which columns are adorned by using the `...` argument.
-It accepts the [tidyselect
-helpers](https://r4ds.had.co.nz/transform.html#select). That is, you can
-specify columns the same way you would using `dplyr::select()`.
-For instance, say you have a numeric column that should not be included
-in percentage formatting and you wish to exempt it. Here, only the
-`proportion` column is adorned:
-``` r
-mtcars %>%
- count(cyl, gear) %>%
- rename(proportion = n) %>%
- adorn_percentages("col", na.rm = TRUE, proportion) %>%
- adorn_pct_formatting(,,,proportion) # the commas say to use the default values of the other arguments
-#> cyl gear proportion
-#> 4 3 3.1%
-#> 4 4 25.0%
-#> 4 5 6.2%
-#> 6 3 6.2%
-#> 6 4 12.5%
-#> 6 5 3.1%
-#> 8 3 37.5%
-#> 8 5 6.2%
-Here we specify that only two consecutive numeric columns should be
-totaled (`year` is numeric but should not be included):
-``` r
-cases <- data.frame(
- region = c("East", "West"),
- year = 2015,
- recovered = c(125, 87),
- died = c(13, 12)
-cases %>%
- adorn_totals(c("col", "row"), fill = "-", na.rm = TRUE, name = "Total Cases", recovered:died)
-#> region year recovered died Total Cases
-#> East 2015 125 13 138
-#> West 2015 87 12 99
-#> Total Cases - 212 25 237
-Here’s a more complex example that uses a data.frame of means, not
-counts. We create a table containing the mean of a 3rd variable when
-grouped by two other variables, then use `adorn_` functions to round the
-values and append Ns. The first part is pretty straightforward:
-``` r
-library(tidyr) # for pivot_wider()
-mpg_by_cyl_and_am <- mtcars %>%
- group_by(cyl, am) %>%
- summarise(mpg = mean(mpg), .groups = "drop") %>%
- pivot_wider(names_from = am, values_from = mpg)
-#> # A tibble: 3 × 3
-#> cyl `0` `1`
-#> 1 4 22.9 28.1
-#> 2 6 19.1 20.6
-#> 3 8 15.0 15.4
-Now to `adorn_` it. Since this is not the result of a `tabyl()` call, it
-doesn’t have the underlying Ns stored in the `core` attribute, so we’ll
-have to supply them:
-``` r
-mpg_by_cyl_and_am %>%
- adorn_rounding() %>%
- adorn_ns(
- ns = mtcars %>% # calculate the Ns on the fly by calling tabyl on the original data
- tabyl(cyl, am)
- ) %>%
- adorn_title("combined", row_name = "Cylinders", col_name = "Is Automatic")
-#> Cylinders/Is Automatic 0 1
-#> 1 4 22.9 (3) 28.1 (8)
-#> 2 6 19.1 (4) 20.6 (3)
-#> 3 8 15.1 (12) 15.4 (2)
-If needed, Ns can be manipulated in their own data.frame before they are
-appended. Here a tabyl with values in the thousands has its Ns formatted
-to include the separating character `,` as typically seen in American
-numbers, e.g., `3,000`.
-First we create the tabyl to adorn:
-``` r
-raw_data <- data.frame(sex = rep(c("m", "f"), 3000),
- age = round(runif(3000, 1, 102), 0))
-raw_data$agegroup = cut(raw_data$age, quantile(raw_data$age, c(0, 1/3, 2/3, 1)))
-comparison <- raw_data %>%
- tabyl(agegroup, sex, show_missing_levels = FALSE) %>%
- adorn_totals(c("row", "col")) %>%
- adorn_percentages("col") %>%
- adorn_pct_formatting(digits = 1)
-#> agegroup f m Total
-#> (1,34] 33.9% 32.3% 33.1%
-#> (34,68] 33.0% 33.7% 33.4%
-#> (68,102] 32.7% 33.3% 33.0%
-#> 0.4% 0.6% 0.5%
-#> Total 100.0% 100.0% 100.0%
-At this point, the Ns are unformatted:
-``` r
-comparison %>%
- adorn_ns()
-#> agegroup f m Total
-#> (1,34] 33.9% (1,018) 32.3% (970) 33.1% (1,988)
-#> (34,68] 33.0% (990) 33.7% (1,012) 33.4% (2,002)
-#> (68,102] 32.7% (980) 33.3% (1,000) 33.0% (1,980)
-#> 0.4% (12) 0.6% (18) 0.5% (30)
-#> Total 100.0% (3,000) 100.0% (3,000) 100.0% (6,000)
-Now we format them to insert the thousands commas. A tabyl’s raw Ns are
-stored in its `"core"` attribute. Here we retrieve those with `attr()`,
-then apply the base R function `format()` to all numeric columns.
-Lastly, we append these Ns using `adorn_ns()`.
-``` r
-formatted_ns <- attr(comparison, "core") %>% # extract the tabyl's underlying Ns
- adorn_totals(c("row", "col")) %>% # to match the data.frame we're appending to
- dplyr::mutate(across(where(is.numeric), ~ format(.x, big.mark = ",")))
-comparison %>%
- adorn_ns(position = "rear", ns = formatted_ns)
-#> agegroup f m Total
-#> (1,34] 33.9% (1,018) 32.3% ( 970) 33.1% (1,988)
-#> (34,68] 33.0% ( 990) 33.7% (1,012) 33.4% (2,002)
-#> (68,102] 32.7% ( 980) 33.3% (1,000) 33.0% (1,980)
-#> 0.4% ( 12) 0.6% ( 18) 0.5% ( 30)
-#> Total 100.0% (3,000) 100.0% (3,000) 100.0% (6,000)
-### Questions? Comments?
-File [an issue on GitHub](https://github.com/sfirke/janitor/issues) if
-you have suggestions related to `tabyl()` and its `adorn_` helpers or
-encounter problems while using them.