Skip to content

Commit

Permalink
feat: group by colors and join
Browse files Browse the repository at this point in the history
  • Loading branch information
agouy committed Nov 11, 2024
1 parent f9d5490 commit 11401bc
Showing 1 changed file with 55 additions and 25 deletions.
80 changes: 55 additions & 25 deletions main.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@ suppressPackageStartupMessages({
library(dplyr, warn.conflicts = FALSE)
})

# Define the bin_data function
bin_data <- function(values, method, n_bins) {

# Bin the data based on the selected method and number of bins
if (method == "Equal Width") {
breaks <- seq(min(values), max(values), length.out = n_bins + 1)
} else if (method == "Quantiles") {
Expand All @@ -18,18 +16,15 @@ bin_data <- function(values, method, n_bins) {
breaks <- pretty(values, n = n_bins)
}

# Use the cut function to bin the data and create a new column
binned_col <- cut(values, breaks, include.lowest = TRUE)

# Return the updated data frame

return(binned_col)
}

bin_data_2d <- function(df, method, n_bins_x, n_bins_y) {
bin_data_2d <- function(df, method, n_bins_x, n_bins_y, group = FALSE) {
x_values <- df$.x
y_values <- df$.y

# Define breaks for x-axis based on the selected method and number of bins
if (method == "Equal Width") {
breaks_x <- seq(min(x_values), max(x_values), length.out = n_bins_x + 1)
breaks_y <- seq(min(y_values), max(y_values), length.out = n_bins_y + 1)
Expand All @@ -41,53 +36,68 @@ bin_data_2d <- function(df, method, n_bins_x, n_bins_y) {
breaks_y <- pretty(y_values, n = n_bins_y)
}

# Use the cut function to bin data along each axis
binned_x <- cut(x_values, breaks_x, include.lowest = TRUE)
binned_y <- cut(y_values, breaks_y, include.lowest = TRUE)

# Combine the binned values to create unique bin identifiers for each 2D cell
binned_2d <- interaction(binned_x, binned_y, drop = TRUE)

x_df <- tibble(x_bin = levels(binned_x)) %>% mutate(x_bin_id = seq_len(nrow(.)))
y_df <- tibble(y_bin = levels(binned_y)) %>% mutate(y_bin_id = seq_len(nrow(.)))
xy_df <- tibble(xy_bin = levels(binned_2d)) %>% mutate(xy_bin_id = seq_len(nrow(.)))

if(!group) df$.colorLevels <- 0L

obs_df <- tibble(
.ci = df$.ci,
.ri = df$.ri,
.x_bin_id = as.numeric(binned_x),
.y_bin_id = as.numeric(binned_y),
.xy_bin_id = as.numeric(binned_2d)
.xy_bin_id = as.numeric(binned_2d),
.colorLevels = df$.colorLevels
) %>%
ctx$addNamespace()

x_df <- left_join(
x_df,
obs_df %>%
group_by(.x_bin_id) %>%
group_by(.x_bin_id, .colorLevels) %>%
summarise(x_count = as.numeric(n())) %>%
mutate(x_prop = x_count / sum(x_count)),
by = c("x_bin_id" = ".x_bin_id")
) %>%
ctx$addNamespace()

y_df <- left_join(
y_df,
obs_df %>%
group_by(.y_bin_id) %>%
group_by(.y_bin_id, .colorLevels) %>%
summarise(y_count = as.numeric(n())) %>%
mutate(y_prop = y_count / sum(y_count)),
by = c("y_bin_id" = ".y_bin_id")
) %>%
ctx$addNamespace()

xy_df <- left_join(
xy_df,
obs_df %>%
group_by(.xy_bin_id) %>%
group_by(.xy_bin_id, .colorLevels) %>%
summarise(xy_count = as.numeric(n())) %>%
mutate(xy_prop = xy_count / sum(xy_count)),
by = c("xy_bin_id" = ".xy_bin_id")
) %>%
ctx$addNamespace()
# Return the 2D binned data

if(!group) {
obs_df <- select(obs_df, -.colorLevels)
x_df <- select(x_df, -.colorLevels)
y_df <- select(y_df, -.colorLevels)
xy_df <- select(xy_df, -.colorLevels)
} else {
x_df <- rename(x_df, .x_colorLevels = .colorLevels)
y_df <- rename(y_df, .y_colorLevels = .colorLevels)
xy_df <- rename(xy_df, .xy_colorLevels = .colorLevels)
}

return(list(obs_df = obs_df, x_df = x_df, y_df = y_df, xy_df = xy_df))
}

Expand All @@ -98,19 +108,39 @@ n_bins <- ctx$op.value("n_bins", as.double, 10)
if(ctx$hasNumericXAxis) {
to_select <- c(".y", ".x", ".ci", ".ri")

out_list <- ctx$select(to_select) %>%
bin_data_2d(method = method, n_bins_x = n_bins, n_bins_y = n_bins)

out_rel <- out_list$obs_df %>%
as_relation() %>%
left_join_relation(ctx$crelation, ".ci", ctx$crelation$rids) %>%
left_join_relation(ctx$rrelation, ".ri", ctx$rrelation$rids) %>%
left_join_relation(as_relation(out_list$x_df), ".x_bin_id", paste0(ctx$namespace, ".x_bin_id")) %>%
left_join_relation(as_relation(out_list$y_df), ".y_bin_id", paste0(ctx$namespace, ".y_bin_id")) %>%
left_join_relation(as_relation(out_list$xy_df), ".xy_bin_id", paste0(ctx$namespace, ".xy_bin_id")) %>%
as_join_operator(c(ctx$cnames, ctx$rnames), c(ctx$cnames, ctx$rnames))
if(length(ctx$colors) > 0) {

out_list <- ctx$select(c(to_select, ".colorLevels")) %>%
bin_data_2d(method = method, n_bins_x = n_bins, n_bins_y = n_bins, group = TRUE)

out_rel <- out_list$obs_df %>%
as_relation() %>%
left_join_relation(ctx$crelation, ".ci", ctx$crelation$rids) %>%
left_join_relation(ctx$rrelation, ".ri", ctx$rrelation$rids) %>%
left_join_relation(as_relation(ctx$schema), ".colorLevels", ".colorLevels") %>%
left_join_relation(as_relation(out_list$x_df), list(".colorLevels", ".x_bin_id"), list(".x_colorLevels", paste0(ctx$namespace, ".x_bin_id"))) %>%
left_join_relation(as_relation(out_list$y_df), list(".colorLevels", ".y_bin_id"), list(".y_colorLevels", paste0(ctx$namespace, ".y_bin_id"))) %>%
left_join_relation(as_relation(out_list$xy_df), list(".colorLevels", ".xy_bin_id"), list(".xy_colorLevels", paste0(ctx$namespace, ".xy_bin_id"))) %>%
as_join_operator(c(ctx$cnames, ctx$rnames, ctx$colors), c(ctx$cnames, ctx$rnames, ctx$colors))

} else {

out_list <- ctx$select(to_select) %>%
bin_data_2d(method = method, n_bins_x = n_bins, n_bins_y = n_bins)

out_rel <- out_list$obs_df %>%
as_relation() %>%
left_join_relation(ctx$crelation, ".ci", ctx$crelation$rids) %>%
left_join_relation(ctx$rrelation, ".ri", ctx$rrelation$rids) %>%
left_join_relation(as_relation(out_list$x_df), ".x_bin_id", paste0(ctx$namespace, ".x_bin_id")) %>%
left_join_relation(as_relation(out_list$y_df), ".y_bin_id", paste0(ctx$namespace, ".y_bin_id")) %>%
left_join_relation(as_relation(out_list$xy_df), ".xy_bin_id", paste0(ctx$namespace, ".xy_bin_id")) %>%
as_join_operator(c(ctx$cnames, ctx$rnames), c(ctx$cnames, ctx$rnames))

}

save_relation(out_rel, ctx)


} else {
to_select <- c(".y", ".ci", ".ri")
Expand Down

0 comments on commit 11401bc

Please sign in to comment.