Skip to content

Commit

Permalink
optimise spectral metrics using data.table
Browse files Browse the repository at this point in the history
  • Loading branch information
adelegem committed Oct 30, 2024
1 parent 8270266 commit 8ad186b
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 129 deletions.
224 changes: 100 additions & 124 deletions R/calculate_spectral_metrics.R
Original file line number Diff line number Diff line change
@@ -1,85 +1,75 @@
#' @title Calculate spectral metrics
#' @description Calculates continuous spectral metrics (CV, SV, CHV) based on input pixel value df
#' @param df description
#' @return
#' @examples
#' @export
#' @import data.table
#' @import geometry
#' @import vegan
#' @import tidyverse

## CV, CHV, SV metric functions appropriated from https://github.com/ALCrofts/CABO_SVH_Forest_Sites/tree/v1.0

# cv function with rarefaction option
calculate_cv <- function(pixel_values_df, # Dataframe with spectral reflectance values
aoi_id, # What you want to calculate spectral diversity for
wavelengths, # Cols where spectral reflectance values are
rarefaction = F,
n = NULL, # Number of random resampling events, if rarefraction = T.
min_points = NULL # minimum number of pixels (ie. the min # of pixels in any subplot)
){
# validate inputs if rarefaction = T
if (rarefaction == T) {
if (is.null(n) || is.null(min_points)) {
stop("Please provide both 'n' and 'min_points' when rarefaction is TRUE.")
}

# convert to datatable (more efficient performance)
setDT(spectral_df)

# create a list to store CV values for each replication
#rarefraction cv function
calculate_cv <- function(pixel_values_df,
subplots = 'subplot_id',
wavelengths,
rarefaction = TRUE,
min_points = NULL,
n = 999) {

# Convert the dataframe to a data.table for efficiency
setDT(pixel_values_df)

if (rarefaction) {
# Initialize a list to store CV values for each replication
cv_list <- vector("list", n)

# b) calculate CV for each resampling event
# Resample and calculate CV for each iteration
for (i in seq_len(n)) {
# sample to the minimum number of points per subplot
sampled_df <- spectral_df[, .SD[sample(.N, min_points)], by = aoi_id, .SDcols = wavelengths]
# Sample to the minimum number of points per subplot
sampled_df <- pixel_values_df[, .SD[sample(.N, min_points)], by = subplots, .SDcols = wavelengths]

# calculate CV for each wavelength within each subplot
cv_data <- sampled_df[, lapply(.SD, function(x) sd(x) / abs(mean(x, na.rm = TRUE))), by = aoi_id]
# Calculate CV for each wavelength within each subplot
cv_data <- sampled_df[, lapply(.SD, function(x) sd(x) / abs(mean(x, na.rm = TRUE))), by = subplots]

# sum across wavelengths and normalize by the number of bands (ignoring NAs)
# Sum across wavelengths and normalize by the number of bands (ignoring NAs)
cv_data[, CV := rowSums(.SD, na.rm = TRUE) / (length(wavelengths) - rowSums(is.na(.SD))), .SDcols = wavelengths]

# store cv values
cv_list[[i]] <- cv_data[, .(CV), by = aoi_id]
# Store CV values along with subplots
cv_list[[i]] <- cv_data[, .(subplot_id = subplots, CV), by = subplots] # Ensure subplot_id is included
}

# c) Collapse list of CV data tables into a single data table and calculate average CV for each area of interest
CV <- rbindlist(cv_list)[, .(CV = mean(CV, na.rm = TRUE)), by = areas_of_interest]
# Collapse the list of CV data tables into a single data table and calculate the average CV for each subplot
cv <- rbindlist(cv_list)[, .(CV = mean(CV, na.rm = TRUE)), by = subplots]

return(CV)
}
if (rarefaction == F){
pixel_values_df %>%
select(c({{aoi_id}}, {{wavelengths}})) %>%
group_by({{aoi_id}}) %>%
summarise_all(~sd(.)/abs(mean(.))) %>%
rowwise({{aoi_id}}) %>%
summarise(CV = sum(c_across(cols = everything()), na.rm = T) / (ncol(.) - sum(is.na(c_across(everything())))))
} else {
# If rarefaction is FALSE, directly calculate the CV without resampling
cv <- pixel_values_df[, lapply(.SD, function(x) sd(x) / abs(mean(x, na.rm = TRUE))), by = subplots, .SDcols = wavelengths]

return(cv)
}
# Sum across wavelengths and normalize by the number of bands (ignoring NAs)
cv[, CV := rowSums(.SD, na.rm = TRUE) / (length(wavelengths) - rowSums(is.na(.SD))), .SDcols = wavelengths]

# Ensure subplot_id is included in the output
cv <- cv[, .(subplot_id = subplots, CV)]
}

return(cv)
}



# sv function
calculate_sv <- function(pixel_values_df, aoi_id, wavelengths) {
spectral_points <- pixel_values_df %>%
group_by({{aoi_id}}) %>%
summarise(points = n())

sv <- pixel_values_df %>%
select(c({{wavelengths}}, {{aoi_id}})) %>%
group_by({{aoi_id}}) %>%
summarise_all(~sum((.x - mean(.x))^2)) %>%
rowwise({{aoi_id}}) %>%
summarise(SS = sum(c_across(cols = everything()))) %>%
left_join(spectral_points) %>%
summarise(SV = SS / (points - 1))

return(sv)
calculate_sv <- function(pixel_values_df, subplots = 'subplot_id', wavelengths) {
# Convert pixel_values_df to data.table for better performance
setDT(pixel_values_df)

# Calculate the number of points per subplot
spectral_points <- pixel_values_df[, .(points = .N), by = subplots]

# Calculate spectral variance (SV)
sv <- pixel_values_df[, lapply(.SD, function(x) sum((x - mean(x, na.rm = TRUE))^2)),
.SDcols = wavelengths,
by = subplots]

# Sum across wavelengths
sv[, SS := rowSums(.SD), .SDcols = wavelengths]

# Join with the spectral points
sv <- sv[spectral_points, on = subplots]

# Calculate SV
sv[, SV := SS / (points - 1)]

return(sv[, .(subplot_id = get(subplots), SV)])
}

# chv function
Expand All @@ -96,96 +86,82 @@ calculate_chv <- function(df, dim) {
}

# function to calculate chv for each subplot
calculate_chv_for_aoi <- function(df, wavelengths, dim = 3, aoi_id = 'aoi_id', rarefraction = TRUE, n = 999) {
results <- tibble(aoi_id = character(), CHV = double())

# Perform PCA for specified wavelengths
PCA <- df %>%
select(all_of(wavelengths)) %>%
vegan::rda(scale = FALSE)
calculate_chv_nopca <- function(df,
wavelengths,
subplots = 'subplot_id',
rarefaction = TRUE,
min_points = NULL,
n = 999) {

# Add subplot id as column to PCA df
pca_results <- data.frame(PCA$CA$u) %>%
bind_cols(aoi_id = df[[aoi_id]])
# Convert df to data.table for better performance
setDT(df)

# Compute the minimum number of points across all subplots
min_points <- pca_results %>%
group_by(aoi_id) %>%
summarise(points = n()) %>%
summarise(min_points = min(points)) %>%
pull(min_points)
results <- data.table(subplot_id = character(), CHV_nopca = double())

# Loop through each subplot
for (aoi in unique(df[[aoi_id]])) {
for (subplot in unique(df[[subplots]])) {
# Subset data for current subplot
subplot_sample <- pca_results %>%
filter(aoi_id == aoi)
subplot_sample <- df[get(subplots) == subplot, ..wavelengths]

if (rarefraction) {
if (rarefaction) {
# Resample CHV n times and calculate the mean
chv_values <- replicate(n, {
resampled <- aoi_sample %>%
select(-aoi_id) %>%
sample_n(min_points, replace = FALSE)
resampled <- subplot_sample[sample(.N, min_points, replace = FALSE)]

# Convert to matrix for convex hull calculation
CHV_matrix <- as.matrix(resampled)

chv_out <- calculate_chv(resampled, dim = dim)
# Calculate CHV
chv_out <- geometry::convhulln(CHV_matrix, option = "FA")
return(chv_out$vol)
})

mean_chv <- mean(chv_values)
} else {
# calculate CHV without resampling
chv_out <- calculate_chv(aoi_sample, dim = dim)
# Calculate CHV without resampling
CHV_matrix <- as.matrix(subplot_sample)
chv_out <- geometry::convhulln(CHV_matrix, option = "FA")
mean_chv <- chv_out$vol
}

# store results
results <- results %>%
add_row(aoi_id = aoi, CHV = mean_chv)
# Store results in data.table
results <- rbind(results, data.table(subplot_id = subplot, CHV_nopca = mean_chv), fill = TRUE)
}

return(results)
}


## FUNCTION FOR CALCULATING ALL METRICS

calculate_spectral_metrics <- function(pixel_values_df, masked = TRUE, wavelengths) {
calculate_spectral_metrics <- function(pixel_values_df,
masked = TRUE,
wavelengths,
min_points,
n = 999,
rarefaction = TRUE) { # Add rarefaction here
results <- list()

# loop through each site (represented as 'identifier' from file name)
for (site_name in unique(pixel_values_df$site_name)) {

# filter pixel values for the current site
site_pixel_values <- pixel_values_df %>% filter(site_name == !!site_name)
for (identifier in unique(pixel_values_df$identifier)) {
site_pixel_values <- pixel_values_df %>% filter(identifier == !!identifier)

# calculate metrics (CV, SV, CHV)
cv <- calculate_cv(site_pixel_values, aoi_id, wavelengths)
sv <- calculate_sv(site_pixel_values, aoi_id, wavelengths)
chv <- calculate_chv_for_aoi(site_pixel_values, wavelengths)
# Calculate metrics, pass rarefaction where needed
cv <- calculate_cv(site_pixel_values, wavelengths = wavelengths, rarefaction = rarefaction, n = n, min_points = min_points)
sv <- calculate_sv(site_pixel_values, wavelengths = wavelengths)
chv <- calculate_chv_nopca(site_pixel_values, wavelengths, rarefaction = rarefaction, min_points = min_points)

# store results
results[[site_name]] <- list(CV = cv, SV = sv, CHV = chv, CHV_nopca = chv_nopca)
results[[identifier]] <- list(CV = cv, SV = sv, CHV = chv)
}

# combine metrics into data frames
combined_cv <- bind_rows(lapply(results, function(x) x$CV), .id = 'site_name')
combined_sv <- bind_rows(lapply(results, function(x) x$SV), .id = 'site_name')
combined_chv <- bind_rows(lapply(results, function(x) x$CHV), .id = 'site_name')
combined_cv <- bind_rows(lapply(results, function(x) x$CV), .id = 'identifier')
combined_sv <- bind_rows(lapply(results, function(x) x$SV), .id = 'identifier')
combined_chv <- bind_rows(lapply(results, function(x) x$CHV), .id = 'identifier')

# create a data frame for combined metrics
combined_metrics <- combined_cv %>%
left_join(combined_sv, by = c("site_name", "aoi_id")) %>%
left_join(combined_chv, by = c("site_name", "aoi_id"))
left_join(combined_sv, by = c("identifier", "subplot_id")) %>%
left_join(combined_chv, by = c("identifier", "subplot_id"))

# add image_type column based on masked argument
if (masked) {
combined_metrics <- combined_metrics %>%
mutate(image_type = 'masked')
} else {
combined_metrics <- combined_metrics %>%
mutate(image_type = 'unmasked')
}
combined_metrics <- combined_metrics %>%
mutate(image_type = ifelse(masked, 'masked', 'unmasked'))

return(combined_metrics)
}
13 changes: 9 additions & 4 deletions R/create_masked_raster.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
#'NIR_band_index = 5)
#' @export
#' @import terra
#' @import raster
#' @import tools
#' @import stringr

# CREATE_MASKED_RASTER FUNCTION
#input can be directory with a number of files, a single file, or string of files.
Expand Down Expand Up @@ -51,7 +51,7 @@ create_masked_raster <- function(input, output_dir,

for (file in files) {
# extract the site identifier from file name
file_name <- strsplit(basename(file))
file_name <- basename(file)

if (!is.null(ndvi_threshold_df)) {

Expand All @@ -60,7 +60,8 @@ create_masked_raster <- function(input, output_dir,


ndvi_threshold <- ndvi_threshold_df$threshold[ndvi_threshold_df$site == site_id]
} else {
}
if (length(ndvi_threshold) == 0 ) {
stop(paste("No NDVI threshold values found for file", file_name))
}

Expand All @@ -71,7 +72,8 @@ create_masked_raster <- function(input, output_dir,
site_id <- nir_threshold_df$site[grepl(nir_threshold_df$site, file_name)]

nir_threshold <- nir_threshold_df$threshold[nir_threshold_df$site == site_id]
} else {
}
if (length(nir_threshold) == 0) {
stop(paste("No NIR threshold values found for file", file_name))
}

Expand Down Expand Up @@ -99,3 +101,6 @@ create_masked_raster <- function(input, output_dir,
}
}

library(tools)
create_masked_raster('C:/Users/adele/Documents/fg_spectral_diversity/data_out/combined_rasters/2024/NSABHC0009_combined_image.tif', 'test', ndvi_threshold = 0.02, nir_threshold = 0.04)

1 change: 0 additions & 1 deletion R/find_optimum_thresholds.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#' @import pROC
#' @import dplyr


# add a for loop so nir and ndvi ground truth values can be given in the same df instead of seperately :)
# FIND OPTIMUM THRESHOLDS FUNCTION

Expand Down

0 comments on commit 8ad186b

Please sign in to comment.