Skip to content

Commit

Permalink
get updated wahisdb data using read_csv approach
Browse files Browse the repository at this point in the history
  • Loading branch information
emmamendelsohn committed Nov 10, 2023
1 parent 917baeb commit cf7e350
Show file tree
Hide file tree
Showing 4 changed files with 777 additions and 776 deletions.
112 changes: 57 additions & 55 deletions R/get_wahis_rvf_outbreaks_raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,63 +8,65 @@
#' @author Emma Mendelsohn
#' @export
get_wahis_rvf_outbreaks_raw <- function() {

# intialize API download parameters
offset <- 0
limit <- 200
outbreaks <- tibble()

# Repeat the query until all data is downloaded
while(TRUE) {

# Set the url call
url <- wahis_rvf_query(offset)

headers <- add_headers("authorization" = glue::glue("token {Sys.getenv('DOLTHUB_API_KEY')}"))

# Make the API request
res <- RETRY("POST", url = url, headers = headers, encode = "json", times = 3)

# Check if the request was successful
if (res$status_code != 200) {
stop("API request failed with status code: ", res$status_code)
}

# Parse the JSON response
dat <- fromJSON(content(res, as = "text"))

# Add to the dataframe
outbreaks <- bind_rows(outbreaks, dat$rows)

# Increment the offset
offset <- offset + limit

Sys.sleep(1)

# Check if all data has been downloaded
if (nrow(dat$rows) < limit) {
break
}
}

return(outbreaks)

}

# function to run query with variable offset
wahis_rvf_query <- function(offset){
# Eead full dataset into memory and filter for RVF
wahis_outbreaks <- read_csv("https://www.dolthub.com/csv/ecohealthalliance/wahisdb/main/wahis_outbreaks") |>
filter(standardized_disease_name == "rift valley fever")

# # intialize API download parameters
# offset <- 0
# limit <- 200
# outbreaks <- tibble()
#
# # Repeat the query until all data is downloaded
# while(TRUE) {
#
# # Set the url call
# url <- wahis_rvf_query(offset)
#
# headers <- add_headers("authorization" = glue::glue("token {Sys.getenv('DOLTHUB_API_KEY')}"))
#
# # Make the API request
# res <- RETRY("GET", url = url, encode = "json", times = 3)
#
# # Check if the request was successful
# if (res$status_code != 200) {
# stop("API request failed with status code: ", res$status_code)
# }
#
# # Parse the JSON response
# dat <- fromJSON(content(res, as = "text"))
#
# # Add to the dataframe
# outbreaks <- bind_rows(outbreaks, dat$rows)
#
# # Increment the offset
# offset <- offset + limit
#
# Sys.sleep(1)
#
# # Check if all data has been downloaded
# if (nrow(dat$rows) < limit) {
# break
# }
# }

endpoint <- "https://www.dolthub.com/api/v1alpha1/ecohealthalliance/wahisdb/main"
query <- glue::glue(
"SELECT we.*, wo.*
FROM wahis_epi_events we
JOIN wahis_outbreaks wo
ON wo.epi_event_id_unique = we.epi_event_id_unique
WHERE we.standardized_disease_name = 'rift valley fever'
ORDER BY we.epi_event_id_unique
LIMIT 200
OFFSET {offset}")
return(wahis_outbreaks)

url <- param_set(endpoint, key = "q", value = url_encode(query))
return(url)
}

# function to run query with variable offset
# wahis_rvf_query <- function(offset){
#
# endpoint <- "https://www.dolthub.com/api/v1alpha1/ecohealthalliance/wahisdb/main"
# query <- glue::glue("SELECT *
# FROM `wahis_outbreaks`
# WHERE standardized_disease_name = 'rift valley fever'
# ORDER BY epi_event_id_unique
# LIMIT 200
# OFFSET {offset}")
#
# url <- param_set(endpoint, key = "q", value = url_encode(query))
# return(url)
# }
5 changes: 3 additions & 2 deletions R/preprocess_wahis_rvf_outbreaks.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
#' @author Emma Mendelsohn
preprocess_wahis_rvf_outbreaks <- function(wahis_rvf_outbreaks_raw) {

wahis_rvf_outbreaks_raw$continent <- countrycode::countrycode(wahis_rvf_outbreaks_raw$iso_code, origin = "iso3c", destination = "continent")
wahis_rvf_outbreaks_raw$continent <- countrycode::countrycode(wahis_rvf_outbreaks_raw$country_unique_code, origin = "iso3c", destination = "continent")
wahis_rvf_outbreaks <- wahis_rvf_outbreaks_raw |>
filter(continent == "Africa") |>
mutate(iso_code = toupper(iso_code))
mutate(iso_code = toupper(country_unique_code)) |>
select(-country_unique_code)

return(wahis_rvf_outbreaks)

Expand Down
Loading

0 comments on commit cf7e350

Please sign in to comment.