Skip to content

Commit

Permalink
Making ir_bin() faster by improving spectra summarizing per bin. Al…
Browse files Browse the repository at this point in the history
…lowing to specify how wavenumber values should be defined after binning.
  • Loading branch information
henningte committed Jun 2, 2022
1 parent 0f37eb6 commit 92a503c
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 29 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
## Improvements

* Making `ir_flatten()` faster by improving spectra combining.
* Making `ir_bin()` faster by improving spectra summarizing per bin. Allowing to specify how wavenumber values should be defined after binning.

# ir 0.2.1

Expand Down
86 changes: 65 additions & 21 deletions R/ir_bin.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,53 @@
#' `ir_bin` bins intensity values of infrared spectra into bins of a
#' defined width or into a defined number of bins.
#'
#' If the last bin contains fewer input values than the remaining bins, it
#' will be dropped and a warning will be printed. If a wavenumber value exactly
#' matches the boundary of a bin window, the respective intensity value will be
#' assigned to both neighboring bins.
#' If a wavenumber value exactly matches the boundary of a bin window, the
#' respective intensity value will be assigned to both neighboring bins.
#'
#' @param x An object of class [`ir`][ir_new_ir()] with integer wavenumber
#' values increasing by 1.
#'
#' @param width An integer value indicating the wavenumber width of each
#' resulting bin. Must be set to `NULL` if `n` is specified.
#' resulting bin.
#'
#' @param new_x_type A character value denoting how new wavenumber values for
#' the computed bins should be stored in the spectra of `x` after binning. Must
#' be one of:
#' \describe{
#' \item{`"start"`}{New wavenumbers for binned intensities are the start
#' wavenumber value which defines the start of each bin. The default
#' (for historical reasons).}
#' \item{`"mean"`}{New wavenumbers for binned intensities are the average
#' of the start and end wavenumber values which define the start and end of
#' each bin.}
#' \item{`"end"`}{New wavenumbers for binned intensities are the end
#' wavenumber value which defines the end of each bin.}
#' }
#'
#' @return An object of class `ir` where spectra have been binned.
#'
#' @examples
#' x <-
#' # new wavenumber values are the first wavenumber value for each bin
#' x1 <-
#' ir::ir_sample_data %>%
#' ir_bin(width = 50, new_x_type = "start")
#'
#' # new wavenumber values are the last wavenumber value for each bin
#' x2 <-
#' ir::ir_sample_data %>%
#' ir_bin(width = 50)
#' ir_bin(width = 50, new_x_type = "mean")
#'
#' # new wavenumber values are the average of the wavenumber values assigned to
#' # each bin
#' x3 <-
#' ir::ir_sample_data %>%
#' ir_bin(width = 50, new_x_type = "end")
#'
#' # compare wavenumber values for first spectra.
#' cbind(x1$spectra[[1]]$x, x2$spectra[[1]]$x, x3$spectra[[1]]$x)
#'
#' @export
ir_bin <- function(x,
width = 10) {
ir_bin <- function(x, width = 10, new_x_type = "start") {

# checks
ir_check_ir(x)
Expand All @@ -36,6 +62,9 @@ ir_bin <- function(x,
if(width %% 1 != 0) {
stop("width must be an integer.")
}
if(length(new_x_type) != 1 || !is.character(new_x_type)) {
stop("`new_x_type` must be a character value and one of 'start', 'mean', 'end'.")
}

x_flat <- ir_flatten(x)

Expand All @@ -50,10 +79,18 @@ ir_bin <- function(x,
# avoid overlapping bins
index_overlaps <- bins_wn$end[-nrow(bins_wn)] - bins_wn$start[-1]
bins_index <-
purrr::map2_df(bins_wn$start, bins_wn$end, function(x, y){
purrr::map_df(seq_len(nrow(bins_wn)), function(i) {
.x <- bins_wn$start[[i]]
.y <- bins_wn$end[[i]]

tibble::tibble(
start = which(x_flat$x >= x)[[1]],
end = rev(which(x_flat$x <= y))[[1]]
index_bin = i,
start = which(x_flat$x >= .x)[[1]],
end = rev(which(x_flat$x <= .y))[[1]],
wn_start = .x,
wn_end = .y,
wn_mean = mean(c(.x, .y)),
index_rows_x_flat = .data$start:.data$end
)
})

Expand All @@ -63,17 +100,24 @@ ir_bin <- function(x,
rlang::warn(paste0("Dropping the last ", n_drop, " values of `x` during binning."))
}

# prepare x_flat for binning
bins_index <- tidyr::unnest(bins_index, cols = .data$index_rows_x_flat)
x_flat <- x_flat[bins_index$index_rows_x_flat, ]
x_flat$index_bin <- bins_index$index_bin
x_flat$x <-
switch(
new_x_type,
"start" = bins_index$wn_start,
"mean" = bins_index$wn_mean,
"end" = bins_index$wn_end
)

# perform binning
x_binned <-
purrr::map_df(seq_len(nrow(bins_index)), function(i) {
dplyr::summarise_all(x_flat[bins_index[i, 1, drop = TRUE]:bins_index[i, 2, drop = TRUE], -1], mean)
})
colnames(x_binned) <- as.character(seq_len(nrow(x)))
x_binned_wn <-
purrr::map_dbl(seq_len(nrow(bins_wn)), function(i) {
mean(bins_wn[i, 1, drop = TRUE], bins_wn[i, 2, drop = TRUE])
})
x_binned <- dplyr::bind_cols(x = x_binned_wn, x_binned)
x_flat %>%
dplyr::group_by(.data$index_bin) %>%
dplyr::summarise_all(mean) %>%
dplyr::select(!dplyr::any_of("index_bin"))

x$spectra <- ir_stack(x_binned)$spectra
x
Expand Down
43 changes: 35 additions & 8 deletions man/ir_bin.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 92a503c

Please sign in to comment.