Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tabyl label #575

Merged
merged 11 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ importFrom(lubridate,second)
importFrom(lubridate,ymd)
importFrom(lubridate,ymd_hms)
importFrom(magrittr,"%>%")
importFrom(rlang,"%||%")
importFrom(rlang,dots_n)
importFrom(rlang,expr)
importFrom(rlang,syms)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ These are all minor breaking changes resulting from enhancements and are not exp

## New features

* `tabyl()` now defaults to displaying the label attribute for the column name (@olivroy, #394).

olivroy marked this conversation as resolved.
Show resolved Hide resolved
* A new function `paste_skip_na()` pastes without including NA values (#537).

* `row_to_names()` now accepts multiple rows as input, and merges them using a new `sep` argument (#536). The default is `sep = "_"`. When handling multiple `NA` values, `row_to_names()` ignores them and only merges non-NA values for column names. When all values are `NA`, `row_to_names()` creates a column name of `"NA"`, a character, rather than `NA`.
Expand Down
2 changes: 1 addition & 1 deletion R/get_dupes.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#' mtcars %>% get_dupes(-c(wt, qsec))
#' mtcars %>% get_dupes(starts_with("cy"))
#' @importFrom tidyselect eval_select
#' @importFrom rlang expr dots_n syms
#' @importFrom rlang expr dots_n syms %||%
get_dupes <- function(dat, ...) {
expr <- rlang::expr(c(...))
pos <- tidyselect::eval_select(expr, data = dat)
Expand Down
1 change: 0 additions & 1 deletion R/print_tabyl.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#' @export

print.tabyl <- function(x, ...) {
print.data.frame(x, row.names = FALSE)
}
34 changes: 26 additions & 8 deletions R/tabyl.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ tabyl.default <- function(dat, show_na = TRUE, show_missing_levels = TRUE, ...)
var_name <- names(dat)
}


# useful error message if input vector doesn't exist
if (is.null(dat)) {
stop(paste0("object ", var_name, " not found"))
Expand All @@ -76,6 +75,13 @@ tabyl.default <- function(dat, show_na = TRUE, show_missing_levels = TRUE, ...)
var_name <- paste(var_name, collapse = "")
}

# Try to retrieve label
if (is.data.frame(dat)) {
var_label <- attr(dat[, var_name], "label", exact = TRUE) %||% var_name
} else {
var_label <- attr(dat, "label", exact = TRUE) %||% var_name
}

# if show_na is not length-1 logical, error helpfully (#377)
if (length(show_na) > 1 || !inherits(show_na, "logical")) {
stop("The value supplied to the \"show_na\" argument must be TRUE or FALSE.\n\nDid you try to call tabyl on two vectors, like tabyl(data$var1, data$var2) ? To create a two-way tabyl, the two vectors must be in the same data.frame, and the function should be called like this: \n
Expand Down Expand Up @@ -133,8 +139,8 @@ tabyl.default <- function(dat, show_na = TRUE, show_missing_levels = TRUE, ...)
dplyr::mutate(percent = n / sum(n, na.rm = TRUE)) # recalculate % without NAs
}

# reassign correct variable name
names(result)[1] <- var_name
# reassign correct variable name (or label if it exists)
names(result)[1] <- var_label

# in case input var name was "n" or "percent", call helper function to set unique names
result <- handle_if_special_names_used(result)
Expand Down Expand Up @@ -238,10 +244,11 @@ tabyl_2way <- function(dat, var1, var2, show_na = TRUE, show_missing_levels = TR
result <- result[c(setdiff(names(result), "NA_"), "NA_")]
}


result %>%
data.frame(., check.names = FALSE) %>%
as_tabyl(axes = 2, row_var_name = names(dat)[1], col_var_name = names(dat)[2])
row_var_name <- names(dat)[1]
col_var_name <- names(dat)[2]
names(result)[1] <- attr(dat[, 1], "label", exact = TRUE) %||% names(result)[1]
data.frame(result, check.names = FALSE) %>%
as_tabyl(axes = 2, row_var_name = row_var_name, col_var_name = col_var_name)
}


Expand All @@ -250,6 +257,10 @@ tabyl_3way <- function(dat, var1, var2, var3, show_na = TRUE, show_missing_level
dat <- dplyr::select(dat, !!var1, !!var2, !!var3)
var3_numeric <- is.numeric(dat[[3]])

# Preserve labels, as attributes are sometimes dropped during transformations.
var1_label <- attr(dat[, 1], "label", exact = TRUE)
var2_label <- attr(dat[, 2], "label", exact = TRUE)

# Keep factor levels for ordering the list at the end
if (is.factor(dat[[3]])) {
third_levels_for_sorting <- levels(dat[[3]])
Expand Down Expand Up @@ -277,7 +288,14 @@ tabyl_3way <- function(dat, var1, var2, var3, show_na = TRUE, show_missing_level
dat[[2]] <- as.factor(dat[[2]])
}

result <- split(dat, dat[[rlang::quo_name(var3)]]) %>%
result <- split(dat, dat[[rlang::quo_name(var3)]])
# split() drops attributes, so we manually add back the label attributes.
result <- lapply(result, function(x) {
attr(x[[1]], "label") <- var1_label
attr(x[[2]], "label") <- var2_label
x
})
result <- result %>%
purrr::map(tabyl_2way, var1, var2, show_na = show_na, show_missing_levels = show_missing_levels) %>%
purrr::map(reset_1st_col_status, col1_class, col1_levels) # reset class of var in 1st col to its input class, #168

Expand Down
11 changes: 11 additions & 0 deletions tests/testthat/test-tabyl.R
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,17 @@ test_that("3-way tabyl with 3rd var factor is listed in right order, #250", {
expect_equal(names(tabyl(z, am, gear, cyl)), c("8", "6", "NA_"))
})

test_that("tabyl works with label attributes (#394)", {
mt_label <- mtcars
attr(mt_label$cyl, "label") <- "Number of cyl"
tab <- tabyl(mt_label, cyl)
expect_named(tab, c("Number of cyl", "n", "percent"))
tab2 <- tabyl(mt_label, cyl, am)
expect_named(tab2, c("Number of cyl", "0", "1"))
tab3 <- tabyl(mt_label, cyl, am, vs)
expect_equal(names(tab3[[1]])[1], "Number of cyl")
})

test_that("tabyl works with ordered 1st variable, #386", {
mt_ordered <- mtcars
mt_ordered$cyl <- ordered(mt_ordered$cyl, levels = c("4", "8", "6"))
Expand Down
Loading