Skip to content

Commit

Permalink
More work integrating AWS
Browse files Browse the repository at this point in the history
  • Loading branch information
n8layman committed Oct 4, 2024
1 parent 813a319 commit f70de3e
Show file tree
Hide file tree
Showing 26 changed files with 705 additions and 342 deletions.
Binary file modified .env
Binary file not shown.
2 changes: 2 additions & 0 deletions R/AWS_get_folder.R
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ AWS_put_files <- function(transformed_file_list,
local_folder,
...) {

transformed_file_list <- basename(transformed_file_list)

# Check if AWS credentials and region are set in the environment
if (any(Sys.getenv(c("AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_REGION")) == "")) {
msg <- paste(
Expand Down
4 changes: 2 additions & 2 deletions R/create_country_polygons.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' @export
create_country_polygons <- function(countries, states) {

country_polygons <- ne_countries(country = countries, returnclass = "sf")
country_polygons <- rnaturalearth::ne_countries(country = countries, returnclass = "sf")
assertthat::assert_that(nrow(country_polygons) == length(countries))
country_polygons <- country_polygons |>
select(featurecla, country = name, country_iso3c = sov_a3)
Expand All @@ -18,7 +18,7 @@ create_country_polygons <- function(countries, states) {
rowwise() |>
group_split() |>
map_dfr(function(x){
ne_states(country = x$country, returnclass = "sf") |>
rnaturalearth::ne_states(country = x$country, returnclass = "sf") |>
filter(name == x$state)
})
assertthat::assert_that(nrow(states) == nrow(state_polygons))
Expand Down
31 changes: 22 additions & 9 deletions R/get_bioclim_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,27 @@
#' @examples
get_bioclim_data <- function(output_dir,
output_filename,
raster_template) {
continent_raster_template,
overwrite = FALSE,
...) {

# Create directory if it does not yet exist
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)

template <- terra::unwrap(raster_template)
template <- terra::unwrap(continent_raster_template)

# Set up safe way to read parquet files
error_safe_read_parquet <- possibly(arrow::read_parquet, NULL)

# GLW filenames
bioclim_filename <- file.path(output_dir, output_filename)

# Check if glw files exist and can be read and that we don't want to overwrite them.
if(!is.null(error_safe_read_parquet(bioclim_filename)) & !overwrite) {
message("preprocessed bioclim parquet file already exists and can be loaded, skipping download and processing")
return(bioclim_filename)
}

bioclim_data <- geodata::worldclim_global(var = "bio", res = 2.5, path = output_dir)

bioclim_data <- transform_raster(bioclim_data, template)
Expand All @@ -34,22 +49,20 @@ get_bioclim_data <- function(output_dir,
# Assign the new names to the layers
names(bioclim_data) <- bioclim_names

filename = paste(output_dir, output_filename, sep = "/")

if(grepl("\\.parquet", filename)) {
if(grepl("\\.parquet", bioclim_filename)) {
# Convert to dataframe
dat <- as.data.frame(bioclim_data, xy = TRUE) |> as_tibble()

# Save as parquet
arrow::write_parquet(dat, filename, compression = "gzip", compression_level = 5)
arrow::write_parquet(dat, bioclim_filename, compression = "gzip", compression_level = 5)

terra::writeRaster(bioclim_data, filename=gsub("parquet", "tif", filename), overwrite=T, gdal=c("COMPRESS=LZW"))
terra::writeRaster(bioclim_data, filename=gsub("parquet", "tif", bioclim_filename), overwrite=T, gdal=c("COMPRESS=LZW"))

} else {
terra::writeRaster(bioclim_data, filename=filename, overwrite=T, gdal=c("COMPRESS=LZW"))
terra::writeRaster(bioclim_data, filename=bioclim_filename, overwrite=T, gdal=c("COMPRESS=LZW"))
}

unlink(paste(output_dir, "climate", sep = "/"), recursive=TRUE)

return(filename)
return(bioclim_filename)
}
43 changes: 27 additions & 16 deletions R/get_elevation_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,37 +10,48 @@
#' @examples
get_elevation_data <- function(output_dir,
output_filename,
raster_template) {
continent_raster_template,
overwrite = FALSE,
...) {

# Create directory if it does not yet exist
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)

template <- terra::unwrap(raster_template)
# Unwrap the raster template
continent_raster_template <- terra::unwrap(continent_raster_template)

# Create a bounding bounding box template
elevation_data <- geodata::elevation_global(res = 0.5 ,
path = output_dir)
# Set up safe way to read parquet files
error_safe_read_parquet <- possibly(arrow::read_parquet, NULL)

elevation_data <- transform_raster(elevation_data,
template = template)
# GLW filenames
elevation_filename <- file.path(output_dir, output_filename)

filename = paste(output_dir, output_filename, sep = "/")
if(!is.null(error_safe_read_parquet(elevation_filename)) & !overwrite) {
message("preprocessed elevation parquet file already exists and can be loaded, skipping download and processing")
return(elevation_filename)
}

if(grepl("\\.parquet", filename)) {
# Create a bounding bounding box template
elevation_data <- geodata::elevation_global(res = 0.5, path = output_dir)
elevation_data <- transform_raster(elevation_data, template = continent_raster_template)

if(grepl("\\.parquet", elevation_filename)) {

# Convert to dataframe
dat <- as.data.frame(elevation_data, xy = TRUE) |> as_tibble()

# Save as parquet
arrow::write_parquet(dat, filename, compression = "gzip", compression_level = 5)

terra::writeRaster(elevation_data, filename=gsub("parquet", "tif", filename), overwrite=T, gdal=c("COMPRESS=LZW"))
arrow::write_parquet(dat, elevation_filename, compression = "gzip", compression_level = 5)
terra::writeRaster(elevation_data, filename=gsub("parquet", "tif", elevation_filename), overwrite=T, gdal=c("COMPRESS=LZW"))

} else {
terra::writeRaster(elevation_data, filename=filename, overwrite=T, gdal=c("COMPRESS=LZW"))
terra::writeRaster(elevation_data, filename=elevation_filename, overwrite=T, gdal=c("COMPRESS=LZW"))
# Check if glw files exist and can be read and that we don't want to overwrite them.
}


# Clean up raw data
unlink(paste(output_dir, "elevation", sep = "/"), recursive=TRUE)

# Return path to compressed raster
return(filename)
return(elevation_filename)
}
4 changes: 1 addition & 3 deletions R/get_glw_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#' @author Whitney Bagge
#' @export
#'
get_glw_data <- function(glw_directory_raw) {
get_glw_data <- function(glw_directory_dataset) {

options(timeout=200)

Expand All @@ -28,6 +28,4 @@ get_glw_data <- function(glw_directory_raw) {

return(glw_directory_raw)



}
42 changes: 29 additions & 13 deletions R/get_landcover_data.R
Original file line number Diff line number Diff line change
@@ -1,39 +1,55 @@
get_landcover_data <- function(output_dir,
output_filename,
raster_template) {
landcover_types,
continent_raster_template,
overwrite = FALSE,
...) {

template <- terra::unwrap(raster_template)
# Create directory if it does not yet exist
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)

landcover_types <- c("trees", "grassland", "shrubs", "cropland", "built", "bare", "snow", "water", "wetland", "mangroves", "moss")
continent_raster_template <- terra::unwrap(continent_raster_template)

# Set up safe way to read parquet files
error_safe_read_parquet <- possibly(arrow::read_parquet, NULL)

# GLW filenames
landcover_filename <- file.path(output_dir, output_filename)

# Check if glw files exist and can be read and that we don't want to overwrite them.
if(!is.null(error_safe_read_parquet(landcover_filename)) & !overwrite) {
message("preprocessed landcover parquet file already exists and can be loaded, skipping download and processing")
return(landcover_filename)
}

# Fetch each layer, process them and stack them into a single SpatRaster
# Cleaning up files as we go to save space.
landcover_data <- map(landcover_types, function(l) {
landcover <- geodata::landcover(var = l, path = output_dir)
file <- sources(landcover)
landcover <- transform_raster(landcover, template)
unlink(file)
file <- terra::sources(landcover)
landcover <- transform_raster(landcover, continent_raster_template)
unlink(file) # Clean up as we go along these files are huge.
landcover
})

# Bind into one raster
landcover_data <- do.call(c, landcover_data)

filename = paste(output_dir, output_filename, sep = "/")

if(grepl("\\.parquet", filename)) {
if(grepl("\\.parquet", landcover_filename)) {
# Convert to dataframe
dat <- as.data.frame(landcover_data, xy = TRUE) |> as_tibble()

# Save as parquet
arrow::write_parquet(dat, filename, compression = "gzip", compression_level = 5)
arrow::write_parquet(dat, landcover_filename, compression = "gzip", compression_level = 5)

terra::writeRaster(landcover_data, filename=gsub("parquet", "tif", filename), overwrite=T, gdal=c("COMPRESS=LZW"))
terra::writeRaster(landcover_data, filename=gsub("parquet", "tif", landcover_filename), overwrite=T, gdal=c("COMPRESS=LZW"))

} else {
terra::writeRaster(landcover_data, filename=filename, overwrite=T, gdal=c("COMPRESS=LZW"))
terra::writeRaster(landcover_data, filename=landcover_filename, overwrite=T, gdal=c("COMPRESS=LZW"))
}

# Clean up raw files which are very large and no longer needed.
unlink(paste(output_dir, "landuse", sep = "/"), recursive=TRUE)

return(filename)
return(landcover_filename)
}
Loading

0 comments on commit f70de3e

Please sign in to comment.