Skip to content

Commit

Permalink
enable pulling locus data from edi, #24
Browse files Browse the repository at this point in the history
  • Loading branch information
jsta committed Nov 14, 2021
1 parent 3ce440b commit 21c5d1e
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 60 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ taxonomy/*.xlsx*
docs
pkgdown/
rebuild.R
README.html
1 change: 1 addition & 0 deletions R/compile.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ lagosus_compile <- function(
message(paste0("locus module version ", locus_version, " already exists at: ",
dest_folder))
}

if (!is.na(locus_folder) & (!file.exists(locus_path) | locus_overwrite)) {
pb$tick(tokens = list(type = "locus data"))
locus <- lagos_ingest(locus_folder = locus_folder)
Expand Down
68 changes: 41 additions & 27 deletions R/get.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#' @inheritParams lagosus_compile
#' @examples \dontrun{
#' # default to latest version
#' lagosus_get(dest_folder = LAGOSUS:::lagosus_path())
#' lagosus_get(dest_folder = LAGOSUS::lagosus_path())
#'
#' # get previous version(s)
#' # - recommended to install corresponding package version
Expand All @@ -20,55 +20,69 @@ lagosus_get <- function(locus_version = NA, locus_folder = NA, locus_overwrite =
dest_folder = NA){

if(dest_folder != lagosus_path()){
warning("Set dest_folder to LAGOSNE:::lagosus_path() so that data persists
warning("Set dest_folder to LAGOSNE::lagosus_path() so that data persists
between R sessions. \n")
}

outpath <- file.path(lagosus_path(), paste0("data_", version, ".rds"))
if(file.exists(outpath) & !locus_overwrite){
warning("LAGOSUS data for this version already exists on the local machine.
Re-download if neccessary using the 'overwrite` argument.'")
return(invisible("LAGOS is the best"))

check_version <- function(module, version) {
outpath <- file.path(lagosus_path(), paste0(module, "_", version, ".rds"))
if(file.exists(outpath) & !locus_overwrite) {
warning("LAGOSUS data for this version already exists on the local machine.
Re-download if neccessary using the 'overwrite` argument.'")
return(invisible("LAGOS is the best"))
}
}

if(version != lagosus_version()){
warning(
paste0("Specified version '", version, "' does not match the most recent LAGOSUS version '", lagosus_version(), "' - If an older LAGOSUS version is desired, see the 'Legacy Versions' section of the README for instructions."))
check_latest <- function(module, version) {
if(all(version != lagosus_version()) & !is.na(version)) {
warning(
paste0("Specified version '", version,
"' does not match the most recent LAGOSUS version '",
lagosus_version(), "' - If an older LAGOSUS version is desired, see the 'Legacy Versions' section of the README for instructions."))
}
}

edi_baseurl <- "https://portal.edirepository.org/nis/dataviewer?packageid="
pasta_baseurl <- "http://pasta.lternet.edu/package/data/eml/edi/"

message("Downloading the 'locus' module ...")
locus_base_edi <- paste0(edi_baseurl, c("edi.100.4"))
locus_base_pasta <- paste0(pasta_baseurl, "100/4")
check_version("locus", locus_version)
check_latest("locus", locus_version)
if(is.na(locus_version)) {
locus_version <- dplyr::pull(
dplyr::filter(lagosus_version(), modules == "locus"), "versions")
}
locus_base_edi <- paste0(edi_baseurl, c("edi.854.1"))
locus_base_pasta <- paste0(pasta_baseurl, "854/1")
locus_dir <- get_lagos_module(locus_base_edi, locus_base_pasta,
"locus", locus_overwrite)

message("Downloading the 'limno' module ...")
limno_base_edi <- paste0(edi_baseurl, c("edi.101.3"))
limno_base_pasta <- paste0(pasta_baseurl, "101/3")
limno_dir <- get_lagos_module(limno_base_edi, limno_base_pasta,
"limno", limno_overwrite)
# message("Downloading the 'limno' module ...")
# limno_base_edi <- paste0(edi_baseurl, c("edi.101.3"))
# limno_base_pasta <- paste0(pasta_baseurl, "101/3")
# limno_dir <- get_lagos_module(limno_base_edi, limno_base_pasta,
# "limno", limno_overwrite)

message("Downloading the 'geo' module ...")
geo_base_edi <- paste0(edi_baseurl, c("edi.99.5"))
geo_base_pasta <- paste0(pasta_baseurl, "99/5")
geo_dir <- get_lagos_module(geo_base_edi, geo_base_pasta,
"geo", geo_overwrite)
# message("Downloading the 'geo' module ...")
# geo_base_edi <- paste0(edi_baseurl, c("edi.99.5"))
# geo_base_pasta <- paste0(pasta_baseurl, "99/5")
# geo_dir <- get_lagos_module(geo_base_edi, geo_base_pasta,
# "geo", geo_overwrite)

dir.create(dest_folder, showWarnings = FALSE)

message("LAGOSNE downloaded. Now compressing to native R object ...")
message("LAGOSUS downloaded. Now compressing to native R object ...")

lagosus_compile(locus_version = locus_version,
locus_folder = locus_dir,
limno_folder = limno_dir,
geo_folder = geo_dir,
locus_overwrite = locus_overwrite,
limno_folder = NA,
geo_folder = NA,
dest_folder = dest_folder
)

return(invisible(list(locus_folder = locus_dir,
limno_folder = limno_dir,
geo_folder = geo_dir)))
limno_folder = NA,
geo_folder = NA)))
}
8 changes: 4 additions & 4 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ query_column_keywords <- function(dt, table_name, keyword_string){

#' @importFrom curl curl_fetch_memory
#' @importFrom stringr str_extract
get_file_names <- function(url){
get_file_names <- function(url) {
handle <- curl::new_handle(nobody = TRUE)

headers <- curl::parse_headers(
Expand All @@ -209,7 +209,7 @@ get_file_names <- function(url){
get_lagos_module <- function(edi_url, pasta_url, folder_name, overwrite){

files <- suppressWarnings(paste0(edi_url, "&entityid=",
readLines(pasta_url)))
readLines(pasta_url)))
file_names <- sapply(files, get_file_names)

files <- files[!is.na(file_names)]
Expand All @@ -219,9 +219,9 @@ get_lagos_module <- function(edi_url, pasta_url, folder_name, overwrite){
dir.create(local_dir, showWarnings = FALSE)

file_paths <- file.path(local_dir, file_names)

invisible(lapply(seq_len(length(files)),
function(i){
function(i) {
message(paste0("Downloading ", file_names[i], " ..."))
get_if_not_exists(files[i], file_paths[i], overwrite)
}))
Expand Down
10 changes: 8 additions & 2 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,27 @@ devtools::install_git(

### Data

Until the LAGOS-US datasets have been made available in a public repository, LAGOSUS users will need to use the `lagosus_compile` function (not `lagosus_get`) and supply the path to their local `locus`, `limno`, `geo`, and `depth` data folders. Replace the paths in the example below with the path to each respective folder on your system. Most people will have access to these folders through Dropbox. For example, the `locus_folder` would be assigned to something like: `C:/Users/FWL/Dropbox/CL_LAGOSUS_exports/LAGOSUS_LOCUS`
Currently only the "locus" module of LAGOS-US has been released in a public repository. Members of the development team who have access to unreleased modules (limno, geo, etc), will need to use the the `lagosus_compile` function (not `lagosus_get`) and supply the path to their local `locus`, `limno`, `geo`, or `depth` data folders. Replace the paths in the example below with the path to each respective folder on your system. Most people will have access to these folders through Dropbox. For example, the `locus_folder` would be assigned to something like: `C:/Users/FWL/Dropbox/CL_LAGOSUS_exports/LAGOSUS_LOCUS`

Files are "compiled" to an `R` data format in the location specified by the `dest_folder` argument. Recommended setting is `lagosus_path()`. Data only needs to be downloaded one time per version per machine. Each `LAGOSUS` [module](https://cont-limno.github.io/LAGOSUS/articles/lagosus_structure.html) has a unique version number.

<!-- dir("../../../Downloads/") -->
```{r eval=FALSE}
library(LAGOSUS)
# only the locus module is currently public
lagosus_get(dest_folder = lagosus_path())
# an example for members of the dev team to specify local data folder paths
lagosus_compile(
locus_version = "1.0",
locus_folder = "~/Downloads/LAGOSUS_LOCUS/LOCUS_v1.0",
limno_version = "2.1",
limno_folder = "~/Downloads/LAGOSUS_LIMNO/US/LIMNO_v2.1/Final exports",
depth_version = "0.1",
depth_folder = "~/Downloads/LAGOSUS_DEPTH/DEPTH_v0.1",
depth_folder = "~/Downloads/LAGOSUS_DEPTH/DEPTH_v0.1",
geo_version = "1.0",
geo_folder = "~/Downloads/LAGOSUS_GEO/GEO_EXPORT_BETA_v1",
dest_folder = lagosus_path())
```

Expand Down
43 changes: 16 additions & 27 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,15 @@ devtools::install_git(

### Data

Until the LAGOS-US datasets have been made available in a public
repository, LAGOSUS users will need to use the `lagosus_compile`
function (not `lagosus_get`) and supply the path to their local `locus`,
`limno`, `geo`, and `depth` data folders. Replace the paths in the
example below with the path to each respective folder on your system.
Most people will have access to these folders through Dropbox. For
example, the `locus_folder` would be assigned to something like:
`C:/Users/FWL/Dropbox/CL_LAGOSUS_exports/LAGOSUS_LOCUS`
Currently only the “locus” module of LAGOS-US has been released in a
public repository. Members of the development team who have access to
unreleased modules (limno, geo, etc), will need to use the the
`lagosus_compile` function (not `lagosus_get`) and supply the path to
their local `locus`, `limno`, `geo`, or `depth` data folders. Replace
the paths in the example below with the path to each respective folder
on your system. Most people will have access to these folders through
Dropbox. For example, the `locus_folder` would be assigned to something
like: `C:/Users/FWL/Dropbox/CL_LAGOSUS_exports/LAGOSUS_LOCUS`

Files are “compiled” to an `R` data format in the location specified by
the `dest_folder` argument. Recommended setting is `lagosus_path()`.
Expand All @@ -58,13 +59,19 @@ has a unique version number.
``` r
library(LAGOSUS)

# only the locus module is currently public
lagosus_get(dest_folder = lagosus_path())

# an example for members of the dev team to specify local data folder paths
lagosus_compile(
locus_version = "1.0",
locus_folder = "~/Downloads/LAGOSUS_LOCUS/LOCUS_v1.0",
limno_version = "2.1",
limno_folder = "~/Downloads/LAGOSUS_LIMNO/US/LIMNO_v2.1/Final exports",
depth_version = "0.1",
depth_folder = "~/Downloads/LAGOSUS_DEPTH/DEPTH_v0.1",
depth_folder = "~/Downloads/LAGOSUS_DEPTH/DEPTH_v0.1",
geo_version = "1.0",
geo_folder = "~/Downloads/LAGOSUS_GEO/GEO_EXPORT_BETA_v1",
dest_folder = lagosus_path())
```

Expand All @@ -88,25 +95,15 @@ names(lg)
```

<!-- ```{r load_data_cached, eval=FALSE, echo=FALSE} -->

<!-- dt <- readRDS(system.file("lagos_test_subset.rds", package = "LAGOSUS")) -->

<!-- names(dt) -->

<!-- ``` -->

<!-- #### Locate tables containing a variable -->

<!-- ```{r eval=FALSE} -->

<!-- query_lagos_names("secchi") -->

<!-- ``` -->

<!-- ```{r echo=FALSE, eval=FALSE} -->

<!-- query_lagos_names("secchi", dt = dt) -->

<!-- ``` -->

#### Preview a table
Expand Down Expand Up @@ -140,21 +137,13 @@ query_lagos_names("ws_meanwidth", dt = lg)
```

<!-- ```{r load printr, echo=FALSE,message=FALSE,results='hide', eval=FALSE} -->

<!-- loadNamespace("printr") -->

<!-- ``` -->

<!-- ```{r Read metadata for individual tables, eval=FALSE} -->

<!-- help.search("datasets", package = "LAGOSUS") -->

<!-- ``` -->

<!-- ```{r unload printr, echo=FALSE, eval=FALSE} -->

<!-- unloadNamespace("printr") -->

<!-- ``` -->

## Legacy Versions
Expand Down

0 comments on commit 21c5d1e

Please sign in to comment.