Skip to content

Commit

Permalink
Merge pull request #67 from ecohealthalliance/patch/wahis-data-update
Browse files Browse the repository at this point in the history
get updated wahisdb data using read_csv approach
  • Loading branch information
emmamendelsohn authored Nov 10, 2023
2 parents 917baeb + f196408 commit bfecfa3
Show file tree
Hide file tree
Showing 7 changed files with 828 additions and 776 deletions.
19 changes: 19 additions & 0 deletions R/get_wahis_rvf_controls_raw.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#' .. content for \description{} (no empty lines) ..
#'
#' .. content for \details{} ..
#'
#' @title

#' @return
#' @author Emma Mendelsohn
#' @export
get_wahis_rvf_controls_raw <- function() {

# Read full dataset into memory and filter for RVF
wahis_controls <- read_csv("https://www.dolthub.com/csv/ecohealthalliance/wahisdb/main/wahis_six_month_controls") |>
filter(standardized_disease_name == "rift valley fever")

return(wahis_controls)


}
114 changes: 59 additions & 55 deletions R/get_wahis_rvf_outbreaks_raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,63 +8,67 @@
#' @author Emma Mendelsohn
#' @export
get_wahis_rvf_outbreaks_raw <- function() {

# intialize API download parameters
offset <- 0
limit <- 200
outbreaks <- tibble()

# Repeat the query until all data is downloaded
while(TRUE) {

# Set the url call
url <- wahis_rvf_query(offset)

headers <- add_headers("authorization" = glue::glue("token {Sys.getenv('DOLTHUB_API_KEY')}"))

# Make the API request
res <- RETRY("POST", url = url, headers = headers, encode = "json", times = 3)

# Check if the request was successful
if (res$status_code != 200) {
stop("API request failed with status code: ", res$status_code)
}

# Parse the JSON response
dat <- fromJSON(content(res, as = "text"))

# Add to the dataframe
outbreaks <- bind_rows(outbreaks, dat$rows)

# Increment the offset
offset <- offset + limit

Sys.sleep(1)

# Check if all data has been downloaded
if (nrow(dat$rows) < limit) {
break
}
}

return(outbreaks)

}

# function to run query with variable offset
wahis_rvf_query <- function(offset){
# Read full dataset into memory and filter for RVF
wahis_outbreaks <- read_csv("https://www.dolthub.com/csv/ecohealthalliance/wahisdb/main/wahis_outbreaks") |>
filter(standardized_disease_name == "rift valley fever")

endpoint <- "https://www.dolthub.com/api/v1alpha1/ecohealthalliance/wahisdb/main"
query <- glue::glue(
"SELECT we.*, wo.*
FROM wahis_epi_events we
JOIN wahis_outbreaks wo
ON wo.epi_event_id_unique = we.epi_event_id_unique
WHERE we.standardized_disease_name = 'rift valley fever'
ORDER BY we.epi_event_id_unique
LIMIT 200
OFFSET {offset}")
return(wahis_outbreaks)

# Below is archived code for retrieving data via SQL query through dolthub API

# # intialize API download parameters
# offset <- 0
# limit <- 200
# outbreaks <- tibble()
#
# # Repeat the query until all data is downloaded
# while(TRUE) {
#
# # Set the url call
# url <- wahis_rvf_query(offset)
#
# headers <- add_headers("authorization" = glue::glue("token {Sys.getenv('DOLTHUB_API_KEY')}"))
#
# # Make the API request
# res <- RETRY("GET", url = url, encode = "json", times = 3)
#
# # Check if the request was successful
# if (res$status_code != 200) {
# stop("API request failed with status code: ", res$status_code)
# }
#
# # Parse the JSON response
# dat <- fromJSON(content(res, as = "text"))
#
# # Add to the dataframe
# outbreaks <- bind_rows(outbreaks, dat$rows)
#
# # Increment the offset
# offset <- offset + limit
#
# Sys.sleep(1)
#
# # Check if all data has been downloaded
# if (nrow(dat$rows) < limit) {
# break
# }
# }

url <- param_set(endpoint, key = "q", value = url_encode(query))
return(url)
}

# function to run query with variable offset
# wahis_rvf_query <- function(offset){
#
# endpoint <- "https://www.dolthub.com/api/v1alpha1/ecohealthalliance/wahisdb/main"
# query <- glue::glue("SELECT *
# FROM `wahis_outbreaks`
# WHERE standardized_disease_name = 'rift valley fever'
# ORDER BY epi_event_id_unique
# LIMIT 200
# OFFSET {offset}")
#
# url <- param_set(endpoint, key = "q", value = url_encode(query))
# return(url)
# }
22 changes: 22 additions & 0 deletions R/preprocess_wahis_rvf_controls.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' .. content for \description{} (no empty lines) ..
#'
#' .. content for \details{} ..
#'
#' @title
#' @param wahis_rvf_controls_raw
#' @return
#' @author Emma Mendelsohn
#' @export
preprocess_wahis_rvf_controls <- function(wahis_rvf_controls_raw) {


wahis_rvf_controls <- wahis_rvf_controls_raw |>
mutate(country = recode(country, "central african (rep.)" = "central african republic",
default = country)) |>
mutate(iso_code = countrycode::countrycode(country, origin = "country.name", destination = "iso3c")) |>
mutate(continent = countrycode::countrycode(country, origin = "country.name", destination = "continent")) |>
filter(continent == "Africa")

return(wahis_rvf_controls)

}
5 changes: 3 additions & 2 deletions R/preprocess_wahis_rvf_outbreaks.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
#' @author Emma Mendelsohn
preprocess_wahis_rvf_outbreaks <- function(wahis_rvf_outbreaks_raw) {

wahis_rvf_outbreaks_raw$continent <- countrycode::countrycode(wahis_rvf_outbreaks_raw$iso_code, origin = "iso3c", destination = "continent")
wahis_rvf_outbreaks_raw$continent <- countrycode::countrycode(wahis_rvf_outbreaks_raw$country_unique_code, origin = "iso3c", destination = "continent")
wahis_rvf_outbreaks <- wahis_rvf_outbreaks_raw |>
filter(continent == "Africa") |>
mutate(iso_code = toupper(iso_code))
mutate(iso_code = toupper(country_unique_code)) |>
select(-country_unique_code)

return(wahis_rvf_outbreaks)

Expand Down
4 changes: 4 additions & 0 deletions _targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ dynamic_targets <- tar_plan(
tar_target(wahis_rvf_outbreaks_preprocessed,
preprocess_wahis_rvf_outbreaks(wahis_rvf_outbreaks_raw)),

tar_target(wahis_rvf_controls_raw, get_wahis_rvf_controls_raw()),
tar_target(wahis_rvf_controls_preprocessed,
preprocess_wahis_rvf_controls(wahis_rvf_controls_raw)),

# SENTINEL NDVI -----------------------------------------------------------
# 2018-present
# 10 day period
Expand Down
24 changes: 14 additions & 10 deletions _targets/meta/meta

Large diffs are not rendered by default.

Loading

0 comments on commit bfecfa3

Please sign in to comment.