From 11401bc9bd4c2ef5a0c26af390fc6448277d0c63 Mon Sep 17 00:00:00 2001 From: agouy Date: Mon, 11 Nov 2024 12:26:10 +0000 Subject: [PATCH] feat: group by colors and join --- main.R | 80 ++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 25 deletions(-) diff --git a/main.R b/main.R index f77f7bf..4ed159e 100644 --- a/main.R +++ b/main.R @@ -6,10 +6,8 @@ suppressPackageStartupMessages({ library(dplyr, warn.conflicts = FALSE) }) -# Define the bin_data function bin_data <- function(values, method, n_bins) { - # Bin the data based on the selected method and number of bins if (method == "Equal Width") { breaks <- seq(min(values), max(values), length.out = n_bins + 1) } else if (method == "Quantiles") { @@ -18,18 +16,15 @@ bin_data <- function(values, method, n_bins) { breaks <- pretty(values, n = n_bins) } - # Use the cut function to bin the data and create a new column binned_col <- cut(values, breaks, include.lowest = TRUE) - - # Return the updated data frame + return(binned_col) } -bin_data_2d <- function(df, method, n_bins_x, n_bins_y) { +bin_data_2d <- function(df, method, n_bins_x, n_bins_y, group = FALSE) { x_values <- df$.x y_values <- df$.y - # Define breaks for x-axis based on the selected method and number of bins if (method == "Equal Width") { breaks_x <- seq(min(x_values), max(x_values), length.out = n_bins_x + 1) breaks_y <- seq(min(y_values), max(y_values), length.out = n_bins_y + 1) @@ -41,53 +36,68 @@ bin_data_2d <- function(df, method, n_bins_x, n_bins_y) { breaks_y <- pretty(y_values, n = n_bins_y) } - # Use the cut function to bin data along each axis binned_x <- cut(x_values, breaks_x, include.lowest = TRUE) binned_y <- cut(y_values, breaks_y, include.lowest = TRUE) - # Combine the binned values to create unique bin identifiers for each 2D cell binned_2d <- interaction(binned_x, binned_y, drop = TRUE) x_df <- tibble(x_bin = levels(binned_x)) %>% mutate(x_bin_id = seq_len(nrow(.))) y_df <- tibble(y_bin = levels(binned_y)) %>% mutate(y_bin_id = seq_len(nrow(.))) xy_df <- tibble(xy_bin = levels(binned_2d)) %>% mutate(xy_bin_id = seq_len(nrow(.))) + if(!group) df$.colorLevels <- 0L + obs_df <- tibble( .ci = df$.ci, .ri = df$.ri, .x_bin_id = as.numeric(binned_x), .y_bin_id = as.numeric(binned_y), - .xy_bin_id = as.numeric(binned_2d) + .xy_bin_id = as.numeric(binned_2d), + .colorLevels = df$.colorLevels ) %>% ctx$addNamespace() + x_df <- left_join( x_df, obs_df %>% - group_by(.x_bin_id) %>% + group_by(.x_bin_id, .colorLevels) %>% summarise(x_count = as.numeric(n())) %>% mutate(x_prop = x_count / sum(x_count)), by = c("x_bin_id" = ".x_bin_id") ) %>% ctx$addNamespace() + y_df <- left_join( y_df, obs_df %>% - group_by(.y_bin_id) %>% + group_by(.y_bin_id, .colorLevels) %>% summarise(y_count = as.numeric(n())) %>% mutate(y_prop = y_count / sum(y_count)), by = c("y_bin_id" = ".y_bin_id") ) %>% ctx$addNamespace() + xy_df <- left_join( xy_df, obs_df %>% - group_by(.xy_bin_id) %>% + group_by(.xy_bin_id, .colorLevels) %>% summarise(xy_count = as.numeric(n())) %>% mutate(xy_prop = xy_count / sum(xy_count)), by = c("xy_bin_id" = ".xy_bin_id") ) %>% ctx$addNamespace() - # Return the 2D binned data + + if(!group) { + obs_df <- select(obs_df, -.colorLevels) + x_df <- select(x_df, -.colorLevels) + y_df <- select(y_df, -.colorLevels) + xy_df <- select(xy_df, -.colorLevels) + } else { + x_df <- rename(x_df, .x_colorLevels = .colorLevels) + y_df <- rename(y_df, .y_colorLevels = .colorLevels) + xy_df <- rename(xy_df, .xy_colorLevels = .colorLevels) + } + return(list(obs_df = obs_df, x_df = x_df, y_df = y_df, xy_df = xy_df)) } @@ -98,19 +108,39 @@ n_bins <- ctx$op.value("n_bins", as.double, 10) if(ctx$hasNumericXAxis) { to_select <- c(".y", ".x", ".ci", ".ri") - out_list <- ctx$select(to_select) %>% - bin_data_2d(method = method, n_bins_x = n_bins, n_bins_y = n_bins) - - out_rel <- out_list$obs_df %>% - as_relation() %>% - left_join_relation(ctx$crelation, ".ci", ctx$crelation$rids) %>% - left_join_relation(ctx$rrelation, ".ri", ctx$rrelation$rids) %>% - left_join_relation(as_relation(out_list$x_df), ".x_bin_id", paste0(ctx$namespace, ".x_bin_id")) %>% - left_join_relation(as_relation(out_list$y_df), ".y_bin_id", paste0(ctx$namespace, ".y_bin_id")) %>% - left_join_relation(as_relation(out_list$xy_df), ".xy_bin_id", paste0(ctx$namespace, ".xy_bin_id")) %>% - as_join_operator(c(ctx$cnames, ctx$rnames), c(ctx$cnames, ctx$rnames)) + if(length(ctx$colors) > 0) { + + out_list <- ctx$select(c(to_select, ".colorLevels")) %>% + bin_data_2d(method = method, n_bins_x = n_bins, n_bins_y = n_bins, group = TRUE) + + out_rel <- out_list$obs_df %>% + as_relation() %>% + left_join_relation(ctx$crelation, ".ci", ctx$crelation$rids) %>% + left_join_relation(ctx$rrelation, ".ri", ctx$rrelation$rids) %>% + left_join_relation(as_relation(ctx$schema), ".colorLevels", ".colorLevels") %>% + left_join_relation(as_relation(out_list$x_df), list(".colorLevels", ".x_bin_id"), list(".x_colorLevels", paste0(ctx$namespace, ".x_bin_id"))) %>% + left_join_relation(as_relation(out_list$y_df), list(".colorLevels", ".y_bin_id"), list(".y_colorLevels", paste0(ctx$namespace, ".y_bin_id"))) %>% + left_join_relation(as_relation(out_list$xy_df), list(".colorLevels", ".xy_bin_id"), list(".xy_colorLevels", paste0(ctx$namespace, ".xy_bin_id"))) %>% + as_join_operator(c(ctx$cnames, ctx$rnames, ctx$colors), c(ctx$cnames, ctx$rnames, ctx$colors)) + } else { + + out_list <- ctx$select(to_select) %>% + bin_data_2d(method = method, n_bins_x = n_bins, n_bins_y = n_bins) + + out_rel <- out_list$obs_df %>% + as_relation() %>% + left_join_relation(ctx$crelation, ".ci", ctx$crelation$rids) %>% + left_join_relation(ctx$rrelation, ".ri", ctx$rrelation$rids) %>% + left_join_relation(as_relation(out_list$x_df), ".x_bin_id", paste0(ctx$namespace, ".x_bin_id")) %>% + left_join_relation(as_relation(out_list$y_df), ".y_bin_id", paste0(ctx$namespace, ".y_bin_id")) %>% + left_join_relation(as_relation(out_list$xy_df), ".xy_bin_id", paste0(ctx$namespace, ".xy_bin_id")) %>% + as_join_operator(c(ctx$cnames, ctx$rnames), c(ctx$cnames, ctx$rnames)) + + } + save_relation(out_rel, ctx) + } else { to_select <- c(".y", ".ci", ".ri")