generated from ecohealthalliance/container-template
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
3,162 additions
and
2,069 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,51 @@ | ||
lag_data <- function(data_files, | ||
lag_intervals, | ||
model_dates_selected, | ||
lagged_data_directory, | ||
basename_template = "lagged_data_{model_dates_selected}.gz.parquet", | ||
overwrite = TRUE, | ||
...) { | ||
|
||
# Check that we're only working on one date at a time | ||
stopifnot(length(model_dates_selected) == 1) | ||
|
||
# Set filename | ||
save_filename <- file.path(lagged_data_directory, glue::glue(basename_template)) | ||
|
||
# Check if file already exists and can be read | ||
error_safe_read_parquet <- possibly(arrow::open_dataset, NULL) | ||
|
||
if(!is.null(error_safe_read_parquet(save_filename)) & !overwrite) { | ||
message("file already exists and can be loaded, skipping download") | ||
return(save_filename) | ||
} | ||
|
||
# The goal of this is to figure out the average of the data column over the interval | ||
# Find dates at start and end interval back from date | ||
# Group by x, y, start_interval, end_interval, and take the mean don't forget na.rm = T | ||
message(glue::glue("calculating lagged data for {dirname(data_files[1])} starting from {model_dates_selected}")) | ||
|
||
data <- arrow::open_dataset(data_files) | ||
lagged_data <- map2_dfr(tail(lag_intervals,-1), head(lag_intervals,-1), function(lag_interval_start, lag_interval_end) { | ||
|
||
lagged_data <- map_dfr(1:(length(lag_intervals) - 1), function(i) { | ||
|
||
start_date = model_dates_selected - days(lag_intervals[i]) | ||
end_date = model_dates_selected - days(lag_intervals[i+1] - 1) | ||
start_date = model_dates_selected + days(lag_interval_start) # start, i.e. 30 days prior. | ||
end_date = model_dates_selected + days(lag_interval_end) # end, i.e. 0 days prior. | ||
message(glue::glue("lag_interval range ({lag_interval_start}, {lag_interval_end}]: ({start_date}, {end_date}]")) | ||
|
||
data |> filter(date >= end_date, date <= start_date) |> | ||
# Note: lags go back in time so the inequality symbols are reversed. Also | ||
# date > start_date makes the range _exclusive_ (start_date, end_date] to avoid | ||
# duplication problems. | ||
arrow::open_dataset(data_files) |> | ||
filter(date > start_date, date <= end_date) |> | ||
collect() |> | ||
select(-source) |> | ||
group_by(x, y, date, doy, month, year) |> | ||
summarize(across(everything(), ~mean(.x, na.rm = T))) |> | ||
mutate(lag_interval = lag_intervals[i]) | ||
summarize(across(everything(), ~mean(.x, na.rm = T)), .groups = "drop") |> | ||
mutate(lag_interval_start = lag_interval_start, | ||
lag_interval_end = lag_interval_end) | ||
|
||
select(-doy, -month, -year) |> mutate(date = dplyr::lag(date, lag_interval)) |> rename_with(~ paste(.x, "lag", lag_interval, sep = "_"), contains("ndvi")) |> drop_na(date) | ||
}) |> reduce(left_join, by = c("x", "y", "date")) |> | ||
drop_na() |> | ||
rename_with(~gsub("_0", "", .x), contains("_0")) | ||
}) | ||
|
||
arrow::write_parquet(lagged_data, save_filename, compression = "gzip", compression_level = 5) | ||
|
||
save_filename | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.