Skip to content

Commit

Permalink
finish forecast anomaly regeneration
Browse files Browse the repository at this point in the history
  • Loading branch information
emmamendelsohn committed Dec 11, 2023
1 parent f7dd0cd commit f252bc5
Show file tree
Hide file tree
Showing 5 changed files with 628 additions and 612 deletions.
35 changes: 28 additions & 7 deletions R/augment_data.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

#' @title
#' @param weather_anomalies
#' @param forecasts_anomalies
Expand All @@ -10,16 +9,38 @@
augment_data <- function(weather_anomalies, forecasts_anomalies,
ndvi_anomalies, augmented_data_directory) {


message("Load datasets into memory")
weather <- arrow::open_dataset(weather_anomalies) |> dplyr::collect()
forecasts <- arrow::open_dataset(forecasts_anomalies) |> dplyr::collect()
ndvi <- arrow::open_dataset(ndvi_anomalies) |> dplyr::collect()

message("NA checks")
## Weather and forecasts
### NAs are in scaled precip data, due to days with 0 precip
weather_check <- purrr::map_lgl(weather, ~any(is.na(.)))
assertthat::assert_that(all(str_detect(names(weather_check[weather_check]), "scaled")))

weather <- arrow::open_dataset(weather_anomalies)
forecasts <- arrow::open_dataset(forecasts_anomalies)
ndvi <- arrow::open_dataset(ndvi_anomalies)
forecasts_check <- purrr::map_lgl(forecasts, ~any(is.na(.)))
assertthat::assert_that(all(str_detect(names(forecasts_check[forecasts_check]), "scaled")))

## NDVI
### Prior to 2018: NAs are due to region missing from Eastern Africa in modis data
### After 2018: NAs are due to smaller pockets of missing data on a per-cycle basis
### okay to remove when developing RSA model (issue #72)
ndvi_check <- purrr::map_lgl(ndvi, ~any(is.na(.)))
assertthat::assert_that(!any(ndvi_check[c("date", "x", "y")]))
ndvi <- drop_na(ndvi)

message("Join into a single object")
augmented_data <- left_join(weather, forecasts, by = join_by(date, x, y)) |>
left_join(ndvi, by = join_by(date, x, y))

left_join(weather, forecasts) |>
left_join(ndvi) |>
message("Save as parquets using hive partitioning by date")
augmented_data |>
group_by(date) |>
write_dataset(augmented_data_directory)

return(list.files(augmented_data_directory))

}
}
6 changes: 0 additions & 6 deletions R/calculate_forecasts_anomalies.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,7 @@ calculate_forecasts_anomalies <- function(ecmwf_forecasts_transformed,
existing_files <- list.files(forecasts_anomalies_directory)
if(save_filename %in% existing_files & !overwrite) {
message("file already exists, skipping download")
saved <- read_parquet(file.path(forecasts_anomalies_directory, save_filename))
saved_check <- col_na(saved)
if(!any(saved_check)) {
return(file.path(forecasts_anomalies_directory, save_filename))
}else{
message("NAs found, regenerating file")
}
}

# Open dataset to transformed data
Expand Down
2 changes: 1 addition & 1 deletion _targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ source("_targets_settings.R")
# For development purposes only, it can be helpful to set these targets to have a tar_cue of tar_cue_upload_aws, which means targets will not check the target for changes after it has been built once

tar_cue_general = "thorough" # CAUTION changing this to never means targets can miss changes to the code. Use only for developing.
tar_cue_upload_aws = "never" # CAUTION changing this to never means targets can miss changes to the code. Use only for developing.
tar_cue_upload_aws = "thorough" # CAUTION changing this to never means targets can miss changes to the code. Use only for developing.

# Static Data Download ----------------------------------------------------
static_targets <- tar_plan(
Expand Down
1,191 changes: 596 additions & 595 deletions _targets/meta/meta

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions renv.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"R": {
"Version": "4.3.1",
"Version": "4.3.2",
"Repositories": [
{
"Name": "RSPM",
Expand Down Expand Up @@ -1494,7 +1494,7 @@
},
"lattice": {
"Package": "lattice",
"Version": "0.22-5",
"Version": "0.21-9",
"Source": "Repository",
"Repository": "CRAN",
"Requirements": [
Expand All @@ -1505,7 +1505,7 @@
"stats",
"utils"
],
"Hash": "7c5e89f04e72d6611c77451f6331a091"
"Hash": "5558c61e0136e247252f5f952cdaad6a"
},
"lifecycle": {
"Package": "lifecycle",
Expand Down

0 comments on commit f252bc5

Please sign in to comment.