Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tabyl label #575

Merged
merged 11 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ importFrom(lubridate,second)
importFrom(lubridate,ymd)
importFrom(lubridate,ymd_hms)
importFrom(magrittr,"%>%")
importFrom(rlang,"%||%")
importFrom(rlang,dots_n)
importFrom(rlang,expr)
importFrom(rlang,syms)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ These are all minor breaking changes resulting from enhancements and are not exp

* When using `row_to_names()`, when all input values in `row_number` for a column are `NA`, `row_to_names()` creates a column name of `"NA"`, a character, rather than `NA`. If code previously used relied on a column name of `NA`, it will now error. To fix this, rely on a column name of `"NA"`.

* When `tabyl()` is called on a data.frame containing labels, it now displays the label attribute as the name of the first column in the the resulting `tabyl` object (@olivroy, #394). This may break subsequent code that refers to the output of such a `tabyl` by column name. To maintain the previous behavior of ignoring variable labels, you can remove the labels with a function like `haven::zap_labels()` or `labelled::remove_labels()` before calling `tabyl()`.


## New features

* A new function `paste_skip_na()` pastes without including NA values (#537).
Expand Down
2 changes: 1 addition & 1 deletion R/get_dupes.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#' mtcars %>% get_dupes(-c(wt, qsec))
#' mtcars %>% get_dupes(starts_with("cy"))
#' @importFrom tidyselect eval_select
#' @importFrom rlang expr dots_n syms
#' @importFrom rlang expr dots_n syms %||%
get_dupes <- function(dat, ...) {
expr <- rlang::expr(c(...))
pos <- tidyselect::eval_select(expr, data = dat)
Expand Down
1 change: 0 additions & 1 deletion R/print_tabyl.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#' @export

print.tabyl <- function(x, ...) {
print.data.frame(x, row.names = FALSE)
}
34 changes: 26 additions & 8 deletions R/tabyl.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ tabyl.default <- function(dat, show_na = TRUE, show_missing_levels = TRUE, ...)
var_name <- names(dat)
}


# useful error message if input vector doesn't exist
if (is.null(dat)) {
stop(paste0("object ", var_name, " not found"))
Expand All @@ -76,6 +75,13 @@ tabyl.default <- function(dat, show_na = TRUE, show_missing_levels = TRUE, ...)
var_name <- paste(var_name, collapse = "")
}

# Try to retrieve label
if (is.data.frame(dat)) {
var_label <- attr(dat[, var_name], "label", exact = TRUE) %||% var_name
} else {
var_label <- attr(dat, "label", exact = TRUE) %||% var_name
}

# if show_na is not length-1 logical, error helpfully (#377)
if (length(show_na) > 1 || !inherits(show_na, "logical")) {
stop("The value supplied to the \"show_na\" argument must be TRUE or FALSE.\n\nDid you try to call tabyl on two vectors, like tabyl(data$var1, data$var2) ? To create a two-way tabyl, the two vectors must be in the same data.frame, and the function should be called like this: \n
Expand Down Expand Up @@ -133,8 +139,8 @@ tabyl.default <- function(dat, show_na = TRUE, show_missing_levels = TRUE, ...)
dplyr::mutate(percent = n / sum(n, na.rm = TRUE)) # recalculate % without NAs
}

# reassign correct variable name
names(result)[1] <- var_name
# reassign correct variable name (or label if it exists)
names(result)[1] <- var_label

# in case input var name was "n" or "percent", call helper function to set unique names
result <- handle_if_special_names_used(result)
Expand Down Expand Up @@ -238,10 +244,11 @@ tabyl_2way <- function(dat, var1, var2, show_na = TRUE, show_missing_levels = TR
result <- result[c(setdiff(names(result), "NA_"), "NA_")]
}


result %>%
data.frame(., check.names = FALSE) %>%
as_tabyl(axes = 2, row_var_name = names(dat)[1], col_var_name = names(dat)[2])
row_var_name <- names(dat)[1]
col_var_name <- names(dat)[2]
names(result)[1] <- attr(dat[, 1], "label", exact = TRUE) %||% names(result)[1]
data.frame(result, check.names = FALSE) %>%
as_tabyl(axes = 2, row_var_name = row_var_name, col_var_name = col_var_name)
}


Expand All @@ -250,6 +257,10 @@ tabyl_3way <- function(dat, var1, var2, var3, show_na = TRUE, show_missing_level
dat <- dplyr::select(dat, !!var1, !!var2, !!var3)
var3_numeric <- is.numeric(dat[[3]])

# Preserve labels, as attributes are sometimes dropped during transformations.
var1_label <- attr(dat[, 1], "label", exact = TRUE)
var2_label <- attr(dat[, 2], "label", exact = TRUE)

# Keep factor levels for ordering the list at the end
if (is.factor(dat[[3]])) {
third_levels_for_sorting <- levels(dat[[3]])
Expand Down Expand Up @@ -277,7 +288,14 @@ tabyl_3way <- function(dat, var1, var2, var3, show_na = TRUE, show_missing_level
dat[[2]] <- as.factor(dat[[2]])
}

result <- split(dat, dat[[rlang::quo_name(var3)]]) %>%
result <- split(dat, dat[[rlang::quo_name(var3)]])
# split() drops attributes, so we manually add back the label attributes.
result <- lapply(result, function(x) {
attr(x[[1]], "label") <- var1_label
attr(x[[2]], "label") <- var2_label
x
})
result <- result %>%
purrr::map(tabyl_2way, var1, var2, show_na = show_na, show_missing_levels = show_missing_levels) %>%
purrr::map(reset_1st_col_status, col1_class, col1_levels) # reset class of var in 1st col to its input class, #168

Expand Down
11 changes: 11 additions & 0 deletions tests/testthat/test-tabyl.R
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,17 @@ test_that("3-way tabyl with 3rd var factor is listed in right order, #250", {
expect_equal(names(tabyl(z, am, gear, cyl)), c("8", "6", "NA_"))
})

test_that("tabyl works with label attributes (#394)", {
mt_label <- mtcars
attr(mt_label$cyl, "label") <- "Number of cyl"
tab <- tabyl(mt_label, cyl)
expect_named(tab, c("Number of cyl", "n", "percent"))
tab2 <- tabyl(mt_label, cyl, am)
expect_named(tab2, c("Number of cyl", "0", "1"))
tab3 <- tabyl(mt_label, cyl, am, vs)
expect_equal(names(tab3[[1]])[1], "Number of cyl")
})

test_that("tabyl works with ordered 1st variable, #386", {
mt_ordered <- mtcars
mt_ordered$cyl <- ordered(mt_ordered$cyl, levels = c("4", "8", "6"))
Expand Down
Loading