From e089719073f008528b3f1af00cfb181625ae2794 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Fri, 23 Aug 2024 17:08:59 -0700
Subject: [PATCH 01/35] Add sf and tribal query options to TADA_DataRetrieval

---
 R/DataDiscoveryRetrieval.R | 771 ++++++++++++++++++++++++++++---------
 1 file changed, 584 insertions(+), 187 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index e0478e95..a9fc3ea3 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -5,7 +5,9 @@
 #' exceptions of endDate and startDate match the web service call format from the
 #' online WQP GUI. endDate and startDate match the format suggested in USGS's
 #' dataRetrieval package (endDate = "YYYY-MM-DD"), which is a more familiar date
-#' format for R users than the WQP GUI's endDateHi = "MM-DD-YYYY".
+#' format for R users than the WQP GUI's endDateHi = "MM-DD-YYYY". aoi_sf, 
+#' tribal_area_type, and tribe_name_parcel do not have corresponding inputs in
+#' the web service.
 #'
 #' Multiple fields are queried together using AND logic, but multiple values within
 #' one field are queried together using OR logic. For example, within
@@ -17,7 +19,11 @@
 #' of the query fields.
 #' characteristicName and Characteristic Group also work as an AND, therefore the
 #' characteristicName must fall within the Characteristic Group when both are entered.
-#'
+#' 
+#' aoi_sf cannot be used with tribal_area_type. If countrycode, countycode, huc,
+#' siteid, or statecode are used with aoi_sf or tribal_area_type they will be ignored
+#' under the assumption that the sf object or tribal location are the intended
+#' area of interest.
 #'
 #' Users can reference the \href{https://www.epa.gov/waterdata/storage-and-retrieval-and-water-quality-exchange-domain-services-and-downloads}{WQX domain tables}
 #' to find allowable values for queries, e.g., reference the WQX domain table to find countycode and statecode: https://cdx.epa.gov/wqx/download/DomainValues/County_CSV.zip
@@ -33,12 +39,15 @@
 #'
 #' @param startDate Start Date string in the format YYYY-MM-DD, for example, "2020-01-01"
 #' @param endDate End Date string in the format YYYY-MM-DD, for example, "2020-01-01"
+#' @param aoi_sf An sf object to use for a query area of interest
 #' @param countrycode Code that identifies a country or ocean (e.g. countrycode = "CA" for Canada, countrycode = "OA" for Atlantic Ocean). See https://www.waterqualitydata.us/Codes/countrycode for options.
 #' @param statecode FIPS state alpha code that identifies a state (e.g. statecode = "DE" for Delaware). See https://www.waterqualitydata.us/Codes/statecode for options.
 #' @param countycode FIPS county name. Note that a state code must also be supplied (e.g. statecode = "AL", countycode = "Chilton"). See https://www.waterqualitydata.us/Codes/countycode for options.
 #' @param huc A numeric code denoting a hydrologic unit. Example: "04030202". Different size hucs can be entered. See https://epa.maps.arcgis.com/home/item.html?id=796992f4588c401fabec7446ecc7a5a3 for a map with HUCS. Click on a HUC to find the associated code.
 #' @param siteid Unique monitoring location identifier.
 #' @param siteType Type of waterbody. See https://www.waterqualitydata.us/Codes/sitetype for options.
+#' @param tribal_area_type One of the six tribal spatial layers: "Alaska Native Allotments", "Alaska Native Villages",  "American Indian Reservations", "Off-reservation Trust Lands", "Oklahoma Tribal Statistical Areas", or "Virginia Federally Recognized Tribes".
+#' @param tribe_name_parcel The name of a tribe corresponding to an entry in the TRIBE_NAME field of the specified tribal_area_type. OR if the type is Alaska Native Allotments" then the corresponding PARCEL_NO.
 #' @param characteristicName Name of parameter. See https://www.waterqualitydata.us/Codes/characteristicName for options.
 #' @param characteristicType Groups of environmental measurements/parameters. See https://www.waterqualitydata.us/Codes/characteristicType for options.
 #' @param sampleMedia Sampling substrate such as water, air, or sediment. See https://www.waterqualitydata.us/Codes/sampleMedia for options.
@@ -49,6 +58,12 @@
 #'
 #' @return TADA-compatible dataframe
 #'
+#' @note
+#' Alaska Native Villages and Virginia Federally Recognized Tribes are point
+#' geometries in the Map Service, not polygons. At the time of this writing they
+#' do not return any data when used for WQP bbox queries and so are set to return
+#' errors when used with this function.
+#' 
 #' @export
 #'
 #' @examples
@@ -160,11 +175,14 @@
 #'
 TADA_DataRetrieval <- function(startDate = "null",
                                endDate = "null",
+                               aoi_sf = NULL,
                                countrycode = "null",
                                countycode = "null",
                                huc = "null",
                                siteid = "null",
                                siteType = "null",
+                               tribal_area_type = "null",
+                               tribe_name_parcel = "null",
                                characteristicName = "null",
                                characteristicType = "null",
                                sampleMedia = "null",
@@ -173,158 +191,537 @@ TADA_DataRetrieval <- function(startDate = "null",
                                project = "null",
                                providers = "null",
                                applyautoclean = TRUE) {
+  
+  
+  
+  # Check for incomplete or inconsistent inputs:
+  
+  # If both an sf object and tribe information are provided it's unclear what
+  # the priority should be for the query
+  if( !is.null(aoi_sf) &
+      ( (tribal_area_type != "null") | (tribe_name_parcel != "null") ) ){
+    stop(
+      paste0(
+        "Both sf data and tribal information have been provided. ",
+        "Please use only one of these query options."
+      )
+    )
+  } 
+  
+  # Check for other arguments that indicate location. Function will ignore
+  # these inputs but warn the user
+  if( 
+    # sf object provided
+    (!is.null(aoi_sf) & inherits(aoi_sf, "sf")) & 
+    # with additional location info
+    any( (countrycode != "null"), (countycode != "null"), (huc != "null"),
+         (siteid != "null"), (statecode != "null") )
+  ){
+    warning(
+      paste0(
+        "Location information has been provided in addition to an sf object. ",
+        "Only the sf object will be used in the query."
+      )
+    )
+  } else if(
+    # Tribe info provided
+    (tribal_area_type != "null") & 
+    # with additional location info
+    any( (countrycode != "null"), (countycode != "null"), (huc != "null"),
+         (siteid != "null"), (statecode != "null") )
+  ){
+    warning(
+      paste0(
+        "Location information has been provided in addition to tribal information. ",
+        "Only the tribal information will be used in the query."
+      )
+    )
+  }
+  
+  # Insufficient tribal info provided
+  if( (tribal_area_type == "null") & (tribe_name_parcel != "null") ){
+    stop("A tribal_area_type is required if tribe_name_parcel is provided.")
+  }
+  
+  
   # Set query parameters
   WQPquery <- list()
-
-  if (!"null" %in% statecode) {
-    load(system.file("extdata", "statecodes_df.Rdata", package = "EPATADA"))
-    statecode <- as.character(statecode)
-    statecodes_sub <- statecodes_df %>% dplyr::filter(STUSAB %in% statecode)
-    statecd <- paste0("US:", statecodes_sub$STATE)
-    if (nrow(statecodes_sub) == 0) {
-      stop("State code is not valid. Check FIPS state/territory abbreviations.")
+  
+  
+  # If an sf object OR tribal info are provided they will be the basis of the query
+  # (The tribal data handling uses sf objects as well)
+  if( (!is.null(aoi_sf) & inherits(aoi_sf, "sf")) | (tribal_area_type != "null") ){
+    
+    sf::sf_use_s2(FALSE)
+    
+    # Build the non-sf part of the query:
+    
+    # StartDate
+    if (length(startDate) > 1) {
+      if (is.na(suppressWarnings(lubridate::parse_date_time(startDate[1], orders = "ymd")))) {
+        stop("Incorrect date format. Please use the format YYYY-MM-DD.")
+      }
+      WQPquery <- c(WQPquery, startDate = list(startDate))
+    } else if (startDate != "null") {
+      if (is.na(suppressWarnings(lubridate::parse_date_time(startDate, orders = "ymd")))) {
+        stop("Incorrect date format. Please use the format YYYY-MM-DD.")
+      }
+      WQPquery <- c(WQPquery, startDate = startDate)
     }
-    if (length(statecode) >= 1) {
-      WQPquery <- c(WQPquery, statecode = list(statecd))
+    # SiteType
+    if (length(siteType) > 1) {
+      WQPquery <- c(WQPquery, siteType = list(siteType))
+    } else if (siteType != "null") {
+      WQPquery <- c(WQPquery, siteType = siteType)
     }
-  }
-
-  if (length(huc) > 1) {
-    WQPquery <- c(WQPquery, huc = list(huc))
-  } else if (huc != "null") {
-    WQPquery <- c(WQPquery, huc = huc)
-  }
-
-  if (length(startDate) > 1) {
-    if (is.na(suppressWarnings(lubridate::parse_date_time(startDate[1], orders = "ymd")))) {
-      stop("Incorrect date format. Please use the format YYYY-MM-DD.")
+    # CharacteristicName
+    if (length(characteristicName) > 1) {
+      WQPquery <- c(WQPquery, characteristicName = list(characteristicName))
+    } else if (characteristicName != "null") {
+      WQPquery <- c(WQPquery, characteristicName = characteristicName)
     }
-    WQPquery <- c(WQPquery, startDate = list(startDate))
-  } else if (startDate != "null") {
-    if (is.na(suppressWarnings(lubridate::parse_date_time(startDate, orders = "ymd")))) {
-      stop("Incorrect date format. Please use the format YYYY-MM-DD.")
+    # CharacteristicType
+    if (length(characteristicType) > 1) {
+      WQPquery <- c(WQPquery, characteristicType = list(characteristicType))
+    } else if (characteristicType != "null") {
+      WQPquery <- c(WQPquery, characteristicType = characteristicType)
     }
-    WQPquery <- c(WQPquery, startDate = startDate)
-  }
-
-  if (length(countrycode) > 1) {
-    WQPquery <- c(WQPquery, countrycode = list(countrycode))
-  } else if (countrycode != "null") {
-    WQPquery <- c(WQPquery, countrycode = countrycode)
-  }
-
-  if (length(countycode) > 1) {
-    WQPquery <- c(WQPquery, countycode = list(countycode))
-  } else if (countycode != "null") {
-    WQPquery <- c(WQPquery, countycode = countycode)
-  }
-
-  if (length(siteid) > 1) {
-    WQPquery <- c(WQPquery, siteid = list(siteid))
-  } else if (siteid != "null") {
-    WQPquery <- c(WQPquery, siteid = siteid)
-  }
-
-  if (length(siteType) > 1) {
-    WQPquery <- c(WQPquery, siteType = list(siteType))
-  } else if (siteType != "null") {
-    WQPquery <- c(WQPquery, siteType = siteType)
-  }
-
-  if (length(characteristicName) > 1) {
-    WQPquery <- c(WQPquery, characteristicName = list(characteristicName))
-  } else if (characteristicName != "null") {
-    WQPquery <- c(WQPquery, characteristicName = characteristicName)
-  }
-
-  if (length(characteristicType) > 1) {
-    WQPquery <- c(WQPquery, characteristicType = list(characteristicType))
-  } else if (characteristicType != "null") {
-    WQPquery <- c(WQPquery, characteristicType = characteristicType)
-  }
-
-  if (length(sampleMedia) > 1) {
-    WQPquery <- c(WQPquery, sampleMedia = list(sampleMedia))
-  } else if (sampleMedia != "null") {
-    WQPquery <- c(WQPquery, sampleMedia = sampleMedia)
-  }
-
-  if (length(project) > 1) {
-    WQPquery <- c(WQPquery, project = list(project))
-  } else if (project != "null") {
-    WQPquery <- c(WQPquery, project = project)
-  }
-
-  if (length(providers) > 1) {
-    WQPquery <- c(WQPquery, providers = list(providers))
-  } else if (providers != "null") {
-    WQPquery <- c(WQPquery, providers = providers)
-  }
-
-  if (length(organization) > 1) {
-    WQPquery <- c(WQPquery, organization = list(organization))
-  } else if (organization != "null") {
-    WQPquery <- c(WQPquery, organization = organization)
-  }
-
-  if (length(endDate) > 1) {
-    if (is.na(suppressWarnings(lubridate::parse_date_time(endDate[1], orders = "ymd")))) {
-      stop("Incorrect date format. Please use the format YYYY-MM-DD.")
+    # SampleMedia
+    if (length(sampleMedia) > 1) {
+      WQPquery <- c(WQPquery, sampleMedia = list(sampleMedia))
+    } else if (sampleMedia != "null") {
+      WQPquery <- c(WQPquery, sampleMedia = sampleMedia)
     }
-    WQPquery <- c(WQPquery, endDate = list(endDate))
-  } else if (endDate != "null") {
-    if (is.na(suppressWarnings(lubridate::parse_date_time(endDate, orders = "ymd")))) {
-      stop("Incorrect date format. Please use the format YYYY-MM-DD.")
+    # Project
+    if (length(project) > 1) {
+      WQPquery <- c(WQPquery, project = list(project))
+    } else if (project != "null") {
+      WQPquery <- c(WQPquery, project = project)
     }
-    WQPquery <- c(WQPquery, endDate = endDate)
-  }
-
-  # Retrieve all 3 profiles
-  print("Downloading WQP query results. This may take some time depending upon the query size.")
-  print(WQPquery)
-  results.DR <- dataRetrieval::readWQPdata(WQPquery,
-    dataProfile = "resultPhysChem",
-    ignore_attributes = TRUE
-  )
-  # check if any results are available
-  if ((nrow(results.DR) > 0) == FALSE) {
-    print("Returning empty results dataframe: Your WQP query returned no results (no data available). Try a different query. Removing some of your query filters OR broadening your search area may help.")
-    TADAprofile.clean <- results.DR
-  } else {
-    sites.DR <- dataRetrieval::whatWQPsites(WQPquery)
-
-    projects.DR <- dataRetrieval::readWQPdata(WQPquery,
-      ignore_attributes = TRUE,
-      service = "Project"
+    # Provider
+    if (length(providers) > 1) {
+      WQPquery <- c(WQPquery, providers = list(providers))
+    } else if (providers != "null") {
+      WQPquery <- c(WQPquery, providers = providers)
+    }
+    # Organization
+    if (length(organization) > 1) {
+      WQPquery <- c(WQPquery, organization = list(organization))
+    } else if (organization != "null") {
+      WQPquery <- c(WQPquery, organization = organization)
+    }
+    # EndDate
+    if (length(endDate) > 1) {
+      if (is.na(suppressWarnings(lubridate::parse_date_time(endDate[1], orders = "ymd")))) {
+        stop("Incorrect date format. Please use the format YYYY-MM-DD.")
+      }
+      WQPquery <- c(WQPquery, endDate = list(endDate))
+    } else if (endDate != "null") {
+      if (is.na(suppressWarnings(lubridate::parse_date_time(endDate, orders = "ymd")))) {
+        stop("Incorrect date format. Please use the format YYYY-MM-DD.")
+      }
+      WQPquery <- c(WQPquery, endDate = endDate)
+    }
+    
+    # sf AOI prep for query
+    
+    # If tribe info is provided then grab the corresponding sf object:
+    if(tribal_area_type != "null"){
+      
+      # Make a reference table for tribal area type + url matching
+      # (options that don't return results are commented out for now)
+      map_service_urls <- tibble::tribble(
+        ~tribal_area,                            ~url,
+        "Alaska Native Allotments",              "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/0",
+        # "Alaska Native Villages",                "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/1",
+        "American Indian Reservations",          "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/2",
+        "Off-reservation Trust Lands",           "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/3",
+        "Oklahoma Tribal Statistical Areas",     "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/4"# ,
+        # "Virginia Federally Recognized Tribes",  "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/5"
+      )
+      
+      # Keep to a single type:
+      if(length(tribal_area_type) > 1){
+        stop("tribal_area_type must be of length 1.")
+      }
+      
+      # These two layers will not return any data when used for bboxes
+      if(tribal_area_type == "Alaska Native Villages"){
+        stop("Alaska Native Villages data are centroid points, not spatial boundaries.")
+      } else if(tribal_area_type == "Virginia Federally Recognized Tribes") {
+        stop("Federally recognized tribal entities in Virginia do not have any available spatial boundaries.")
+      }
+      
+      # These area types allow filtering by TRIBE_NAME (unique within each type)
+      if(tribal_area_type %in% c(
+        # "Alaska Native Villages",
+        "American Indian Reservations",
+        "Off-reservation Trust Lands",
+        "Oklahoma Tribal Statistical Areas"#,
+        # "Virginia Federally Recognized Tribes"
+      )
+      ){
+        
+        # Get the relevant url
+        aoi_sf <- filter(map_service_urls,
+                         tribal_area == tribal_area_type)$url %>%
+          # Pull data
+          arcgislayers::arc_open() %>%
+          # Return sf
+          arcgislayers::arc_select() %>%
+          # If a value provided, then filter
+          {if ((tribe_name_parcel != "null") & (tribe_name_parcel != "null")) {
+            filter(., TRIBE_NAME %in% tribe_name_parcel)
+          } else {
+            .
+          }}
+        
+        # Otherwise filter by PARCEL_NO (Note that values in this col are not unique)
+      } else if(tribal_area_type == "Alaska Native Allotments"){
+        
+        aoi_sf <- filter(map_service_urls,
+                         tribal_area == tribal_area_type)$url %>%
+          arcgislayers::arc_open() %>%
+          arcgislayers::arc_select() %>%
+          {if ((tribe_name_parcel != "null") & (tribe_name_parcel != "null")) {
+            filter(., PARCEL_NO %in% tribe_name_parcel)
+          } else {
+            .
+          }}
+        
+      } else {
+        stop("Tribal area type not recognized. Refer to TADA_TribalOptions() for query options.")
+      }
+      
+    }
+    
+    # Match CRS
+    if(sf::st_crs(aoi_sf) != 4326){
+      aoi_sf <- sf::st_transform(aoi_sf, crs = 4326)
+    }
+    
+    # Get bbox of the sf object
+    input_bbox <- sf::st_bbox(aoi_sf)
+    
+    # Query site info within the bbox
+    bbox_sites <- dataRetrieval::whatWQPsites(
+      WQPquery,
+      bBox = c(input_bbox$xmin, input_bbox$ymin, input_bbox$xmax, input_bbox$ymax)
     )
-
-    TADAprofile <- TADA_JoinWQPProfiles(
-      FullPhysChem = results.DR,
-      Sites = sites.DR,
-      Projects = projects.DR
+    
+    # Check if any sites are within the aoi
+    if ( (nrow(bbox_sites) > 0 ) == FALSE) {
+      stop("No monitoring sites were returned within your area of interest (no data available).")
+    }
+    
+    # Reformat returned info as sf
+    bbox_sites_sf <- TADA_MakeSpatial(bbox_sites, crs = 4326)
+    
+    # Subset sites to only within shapefile and get IDs
+    clipped_sites_sf <- bbox_sites_sf[aoi_sf, ]
+    
+    clipped_site_ids <- clipped_sites_sf$MonitoringLocationIdentifier
+    
+    # Check number of sites returned. More than 300 will require a map() approach
+    if( length(clipped_site_ids) > 300 ) {
+      warning(
+        paste0(
+          "More than 300 sites are matched by the AOI and query terms. ",
+          "If your AOI is a county, state, country, or HUC boundary it would be more efficient to provide a code instead of an sf object."
+        )
+      )
+      
+      # Split IDs into a list
+      id_cluster_list <- split(x = clipped_site_ids,
+                               f = ceiling(seq_along(clipped_site_ids) / 300))
+      
+      print("Downloading WQP query results. This may take some time depending upon the query size.")
+      
+      # List of query results
+      results.DR <- purrr::map(
+        .x = id_cluster_list,
+        .f = ~suppressMessages(
+          dataRetrieval::readWQPdata(
+            siteid = .x,
+            WQPquery,
+            dataProfile = "resultPhysChem",
+            ignore_attributes = TRUE
+          )
+        ) %>%
+          # To allow row binds
+          dplyr::mutate(across(everything(), as.character))
+      ) %>%
+        list_rbind()
+      
+      # Check if any results were returned
+      if ( (nrow(results.DR) > 0 ) == FALSE) {
+        print(
+          paste0(
+            "Returning empty results dataframe: ",
+            "Your WQP query returned no results (no data available). ",
+            "Try a different query. ",
+            "Removing some of your query filters OR broadening your search area may help."
+          )
+        )
+        TADAprofile.clean <- results.DR
+      } else {
+        
+        # Get site metadata
+        sites.DR <- clipped_sites_sf %>%
+          as_tibble() %>%
+          select(-geometry)
+        
+        # Get project metadata
+        projects.DR <- dataRetrieval::readWQPdata(
+          siteid = clipped_site_ids,
+          WQPquery,
+          ignore_attributes = TRUE,
+          service = "Project"
+        )
+        
+        # Join results, sites, projects
+        TADAprofile <- TADA_JoinWQPProfiles(
+          FullPhysChem = results.DR,
+          Sites = sites.DR,
+          Projects = projects.DR
+        )
+        
+        # need to specify this or throws error when trying to bind rows.
+        # Temporary fix for larger issue where data structure for all columns
+        # should be specified.
+        TADAprofile <- TADAprofile %>% dplyr::mutate(
+          across(everything(), as.character)
+        )
+        
+        # run TADA_AutoClean function
+        if (applyautoclean == TRUE) {
+          print("Data successfully downloaded. Running TADA_AutoClean function.")
+          
+          TADAprofile.clean <- TADA_AutoClean(TADAprofile)
+        } else {
+          TADAprofile.clean <- TADAprofile
+        }
+      }
+      
+      return(TADAprofile.clean)
+      
+      # Less than 300 sites:
+    } else {
+      
+      # Retrieve all 3 profiles
+      print("Downloading WQP query results. This may take some time depending upon the query size.")
+      print(WQPquery)
+      
+      # Get results
+      results.DR <- dataRetrieval::readWQPdata(
+        siteid = clipped_site_ids,
+        WQPquery,
+        dataProfile = "resultPhysChem",
+        ignore_attributes = TRUE
+      )
+      
+      # check if any results were returned
+      if ((nrow(results.DR) > 0) == FALSE) {
+        paste0(
+          "Returning empty results dataframe: ",
+          "Your WQP query returned no results (no data available). ",
+          "Try a different query. ",
+          "Removing some of your query filters OR broadening your search area may help."
+        )
+        TADAprofile.clean <- results.DR
+      } else {
+        
+        # Get site metadata
+        sites.DR <- dataRetrieval::whatWQPsites(WQPquery)
+        
+        # Get project metadata
+        projects.DR <- dataRetrieval::readWQPdata(WQPquery,
+                                                  ignore_attributes = TRUE,
+                                                  service = "Project")
+        
+        # Join results, sites, projects
+        TADAprofile <- TADA_JoinWQPProfiles(
+          FullPhysChem = results.DR,
+          Sites = sites.DR,
+          Projects = projects.DR
+        )
+        
+        # need to specify this or throws error when trying to bind rows.
+        # Temporary fix for larger issue where data structure for all columns
+        # should be specified.
+        TADAprofile <- TADAprofile %>% dplyr::mutate(
+          across(everything(), as.character)
+        )
+        
+        # run TADA_AutoClean function
+        if (applyautoclean == TRUE) {
+          print("Data successfully downloaded. Running TADA_AutoClean function.")
+          
+          TADAprofile.clean <- TADA_AutoClean(TADAprofile)
+        } else {
+          TADAprofile.clean <- TADAprofile
+        }
+      }
+      
+      return(TADAprofile.clean)
+      
+    }
+    
+    # If no sf object provided:
+  } else {  
+    # Set query parameters
+    WQPquery <- list()
+    
+    if (!"null" %in% statecode) {
+      load(system.file("extdata", "statecodes_df.Rdata", package = "EPATADA"))
+      statecode <- as.character(statecode)
+      statecodes_sub <- statecodes_df %>% dplyr::filter(STUSAB %in% statecode)
+      statecd <- paste0("US:", statecodes_sub$STATE)
+      if (nrow(statecodes_sub) == 0) {
+        stop("State code is not valid. Check FIPS state/territory abbreviations.")
+      }
+      if (length(statecode) >= 1) {
+        WQPquery <- c(WQPquery, statecode = list(statecd))
+      }
+    }
+    
+    if (length(huc) > 1) {
+      WQPquery <- c(WQPquery, huc = list(huc))
+    } else if (huc != "null") {
+      WQPquery <- c(WQPquery, huc = huc)
+    }
+    
+    if (length(startDate) > 1) {
+      if (is.na(suppressWarnings(lubridate::parse_date_time(startDate[1], orders = "ymd")))) {
+        stop("Incorrect date format. Please use the format YYYY-MM-DD.")
+      }
+      WQPquery <- c(WQPquery, startDate = list(startDate))
+    } else if (startDate != "null") {
+      if (is.na(suppressWarnings(lubridate::parse_date_time(startDate, orders = "ymd")))) {
+        stop("Incorrect date format. Please use the format YYYY-MM-DD.")
+      }
+      WQPquery <- c(WQPquery, startDate = startDate)
+    }
+    
+    if (length(countrycode) > 1) {
+      WQPquery <- c(WQPquery, countrycode = list(countrycode))
+    } else if (countrycode != "null") {
+      WQPquery <- c(WQPquery, countrycode = countrycode)
+    }
+    
+    if (length(countycode) > 1) {
+      WQPquery <- c(WQPquery, countycode = list(countycode))
+    } else if (countycode != "null") {
+      WQPquery <- c(WQPquery, countycode = countycode)
+    }
+    
+    if (length(siteid) > 1) {
+      WQPquery <- c(WQPquery, siteid = list(siteid))
+    } else if (siteid != "null") {
+      WQPquery <- c(WQPquery, siteid = siteid)
+    }
+    
+    if (length(siteType) > 1) {
+      WQPquery <- c(WQPquery, siteType = list(siteType))
+    } else if (siteType != "null") {
+      WQPquery <- c(WQPquery, siteType = siteType)
+    }
+    
+    if (length(characteristicName) > 1) {
+      WQPquery <- c(WQPquery, characteristicName = list(characteristicName))
+    } else if (characteristicName != "null") {
+      WQPquery <- c(WQPquery, characteristicName = characteristicName)
+    }
+    
+    if (length(characteristicType) > 1) {
+      WQPquery <- c(WQPquery, characteristicType = list(characteristicType))
+    } else if (characteristicType != "null") {
+      WQPquery <- c(WQPquery, characteristicType = characteristicType)
+    }
+    
+    if (length(sampleMedia) > 1) {
+      WQPquery <- c(WQPquery, sampleMedia = list(sampleMedia))
+    } else if (sampleMedia != "null") {
+      WQPquery <- c(WQPquery, sampleMedia = sampleMedia)
+    }
+    
+    if (length(project) > 1) {
+      WQPquery <- c(WQPquery, project = list(project))
+    } else if (project != "null") {
+      WQPquery <- c(WQPquery, project = project)
+    }
+    
+    if (length(providers) > 1) {
+      WQPquery <- c(WQPquery, providers = list(providers))
+    } else if (providers != "null") {
+      WQPquery <- c(WQPquery, providers = providers)
+    }
+    
+    if (length(organization) > 1) {
+      WQPquery <- c(WQPquery, organization = list(organization))
+    } else if (organization != "null") {
+      WQPquery <- c(WQPquery, organization = organization)
+    }
+    
+    if (length(endDate) > 1) {
+      if (is.na(suppressWarnings(lubridate::parse_date_time(endDate[1], orders = "ymd")))) {
+        stop("Incorrect date format. Please use the format YYYY-MM-DD.")
+      }
+      WQPquery <- c(WQPquery, endDate = list(endDate))
+    } else if (endDate != "null") {
+      if (is.na(suppressWarnings(lubridate::parse_date_time(endDate, orders = "ymd")))) {
+        stop("Incorrect date format. Please use the format YYYY-MM-DD.")
+      }
+      WQPquery <- c(WQPquery, endDate = endDate)
+    }
+    
+    # Retrieve all 3 profiles
+    print("Downloading WQP query results. This may take some time depending upon the query size.")
+    print(WQPquery)
+    results.DR <- dataRetrieval::readWQPdata(WQPquery,
+                                             dataProfile = "resultPhysChem",
+                                             ignore_attributes = TRUE
     )
-
-    # need to specify this or throws error when trying to bind rows. Temporary fix for larger
-    # issue where data structure for all columns should be specified.
-    cols <- names(TADAprofile)
-
-    TADAprofile <- TADAprofile %>% dplyr::mutate_at(cols, as.character)
-
-    # run TADA_AutoClean function
-    if (applyautoclean == TRUE) {
-      print("Data successfully downloaded. Running TADA_AutoClean function.")
-
-      TADAprofile.clean <- TADA_AutoClean(TADAprofile)
+    # check if any results are available
+    if ((nrow(results.DR) > 0) == FALSE) {
+      print("Returning empty results dataframe: Your WQP query returned no results (no data available). Try a different query. Removing some of your query filters OR broadening your search area may help.")
+      TADAprofile.clean <- results.DR
     } else {
-      TADAprofile.clean <- TADAprofile
+      sites.DR <- dataRetrieval::whatWQPsites(WQPquery)
+      
+      projects.DR <- dataRetrieval::readWQPdata(WQPquery,
+                                                ignore_attributes = TRUE,
+                                                service = "Project"
+      )
+      
+      TADAprofile <- TADA_JoinWQPProfiles(
+        FullPhysChem = results.DR,
+        Sites = sites.DR,
+        Projects = projects.DR
+      )
+      
+      # need to specify this or throws error when trying to bind rows. Temporary fix for larger
+      # issue where data structure for all columns should be specified.
+      cols <- names(TADAprofile)
+      
+      TADAprofile <- TADAprofile %>% dplyr::mutate_at(cols, as.character)
+      
+      # run TADA_AutoClean function
+      if (applyautoclean == TRUE) {
+        print("Data successfully downloaded. Running TADA_AutoClean function.")
+        
+        TADAprofile.clean <- TADA_AutoClean(TADAprofile)
+      } else {
+        TADAprofile.clean <- TADAprofile
+      }
     }
+    
+    return(TADAprofile.clean)
   }
-
-  return(TADAprofile.clean)
+  
 }
 
 
-
 #' Read in WQP data using the Water Quality Portal (WQP) web services
 #'
 #' Go to the WQP website (https://www.waterqualitydata.us/) and fill out the
@@ -379,25 +776,25 @@ TADA_ReadWQPWebServices <- function(webservice) {
   # read in csv from WQP web service
   if (grepl("zip=yes", webservice)) {
     webservice <- stringr::str_replace(webservice, "zip=yes", "zip=no")
-
+    
     # download data
     webservice <- data.table::fread(toString(webservice))
-
+    
     # if input df was not downloaded using USGS's dataRetrieval, then the
     # column names will include / separators instead of . and TADA uses .
     # (e.g. ResultMeasure/MeasureUnitCode vs. ResultMeasure.MeasureUnitCode)
     colnames(webservice) <- gsub("/", ".", colnames(webservice))
-
+    
     return(webservice)
   } else {
     # download data
     webservice <- data.table::fread(toString(webservice))
-
+    
     # if input df was not downloaded using USGS's dataRetrieval, then the
     # column names will include / separators instead of . and TADA uses .
     # (e.g. ResultMeasure/MeasureUnitCode vs. ResultMeasure.MeasureUnitCode)
     colnames(webservice) <- gsub("/", ".", colnames(webservice))
-
+    
     return(webservice)
   }
 }
@@ -487,11 +884,11 @@ TADA_BigDataRetrieval <- function(startDate = "null",
                                   maxrecs = 250000,
                                   applyautoclean = FALSE) {
   start_T <- Sys.time()
-
+  
   if (!"null" %in% statecode & !"null" %in% huc) {
     stop("Please provide either state code(s) OR huc(s) to proceed.")
   }
-
+  
   if (!startDate == "null") {
     startDat <- lubridate::ymd(startDate)
     startYearLo <- lubridate::year(startDat)
@@ -500,7 +897,7 @@ TADA_BigDataRetrieval <- function(startDate = "null",
     startDat <- lubridate::ymd(startDate)
     startYearLo <- lubridate::year(startDat)
   }
-
+  
   # Logic: if the input endDate is not null, convert to date and obtain year
   # for summary
   if (!endDate == "null") {
@@ -511,7 +908,7 @@ TADA_BigDataRetrieval <- function(startDate = "null",
     endDat <- lubridate::ymd(endDate)
     endYearHi <- lubridate::year(endDat)
   }
-
+  
   # Create readWQPsummary query
   WQPquery <- list()
   if (length(characteristicName) > 1) {
@@ -529,7 +926,7 @@ TADA_BigDataRetrieval <- function(startDate = "null",
   } else if (siteType != "null") {
     WQPquery <- c(WQPquery, siteType = siteType)
   }
-
+  
   if (!"null" %in% statecode) {
     load(system.file("extdata", "statecodes_df.Rdata", package = "EPATADA"))
     statecode <- as.character(statecode)
@@ -547,45 +944,45 @@ TADA_BigDataRetrieval <- function(startDate = "null",
       WQPquery <- c(WQPquery, statecode = statecd)
     }
   }
-
+  
   if (length(huc) > 1) {
     WQPquery <- c(WQPquery, huc = list(huc))
   } else if (huc != "null") {
     WQPquery <- c(WQPquery, huc = huc)
   }
-
+  
   if (length(countrycode) > 1) {
     WQPquery <- c(WQPquery, countrycode = list(countrycode))
   } else if (countrycode != "null") {
     WQPquery <- c(WQPquery, countrycode = countrycode)
   }
-
+  
   if (length(countycode) > 1) {
     WQPquery <- c(WQPquery, countycode = list(countycode))
   } else if (countycode != "null") {
     WQPquery <- c(WQPquery, countycode = countycode)
   }
-
+  
   if (length(organization) > 1) {
     WQPquery <- c(WQPquery, organization = list(organization))
   } else if (organization != "null") {
     WQPquery <- c(WQPquery, organization = organization)
   }
-
+  
   # cut down on summary query time if possible based on big data query
   diffdat <- lubridate::time_length(difftime(Sys.Date(), startDat), "years")
-
+  
   if (diffdat <= 1) {
     WQPquery <- c(WQPquery, summaryYears = 1)
   }
-
+  
   if (diffdat > 1 & diffdat <= 5) {
     WQPquery <- c(WQPquery, summaryYears = 5)
   }
-
+  
   print("Building site summary table for chunking result downloads...")
   df_summary <- dataRetrieval::readWQPsummary(WQPquery)
-
+  
   ## NOTE: if query brings back no results, function returns empty
   # dataRetrieval profile, not empty summary
   if (nrow(df_summary) > 0) {
@@ -595,7 +992,7 @@ TADA_BigDataRetrieval <- function(startDate = "null",
         YearSummarized >= startYearLo,
         YearSummarized <= endYearHi
       )
-
+    
     rm(df_summary)
     # if there are still site records when filtered to years of interest....
     if (dim(sites)[1] > 0) {
@@ -621,15 +1018,15 @@ TADA_BigDataRetrieval <- function(startDate = "null",
             i <- i + 1
             groupings <- plyr::rbind.fill(groupings, group)
           }
-
+          
           x$group <- i
-
+          
           groupings <- plyr::rbind.fill(groupings, x)
         }
         return(groupings)
       }
-
-
+      
+      
       # get total number of results per site and separate out sites with >250000 results
       tot_sites <- sites %>%
         dplyr::group_by(MonitoringLocationIdentifier) %>%
@@ -637,14 +1034,14 @@ TADA_BigDataRetrieval <- function(startDate = "null",
         dplyr::arrange(tot_n)
       smallsites <- tot_sites %>% dplyr::filter(tot_n <= maxrecs)
       bigsites <- tot_sites %>% dplyr::filter(tot_n > maxrecs)
-
+      
       df <- data.frame()
-
+      
       if (dim(smallsites)[1] > 0) {
         smallsitesgrp <- make_groups(smallsites, maxrecs)
-
+        
         print(paste0("Downloading data from sites with fewer than ", maxrecs, " results by grouping them together."))
-
+        
         for (i in 1:max(smallsitesgrp$group)) {
           site_chunk <- subset(smallsitesgrp$MonitoringLocationIdentifier, smallsitesgrp$group == i)
           joins <- TADA_DataRetrieval(
@@ -660,29 +1057,29 @@ TADA_BigDataRetrieval <- function(startDate = "null",
             df <- dplyr::bind_rows(df, joins)
           }
         }
-
+        
         rm(smallsites, smallsitesgrp)
       }
-
+      
       if (dim(bigsites)[1] > 0) {
         print(paste0("Downloading data from sites with greater than ", maxrecs, " results, chunking queries by shorter time intervals..."))
-
+        
         bsitesvec <- unique(bigsites$MonitoringLocationIdentifier)
-
+        
         for (i in 1:length(bsitesvec)) {
           mlidsum <- subset(sites, sites$MonitoringLocationIdentifier == bsitesvec[i])
           mlidsum <- mlidsum %>%
             dplyr::group_by(MonitoringLocationIdentifier, YearSummarized) %>%
             dplyr::summarise(tot_n = sum(ResultCount))
           site_chunk <- unique(mlidsum$MonitoringLocationIdentifier)
-
+          
           bigsitegrps <- make_groups(mlidsum, maxrecs)
-
+          
           for (i in 1:max(bigsitegrps$group)) {
             yearchunk <- subset(bigsitegrps$YearSummarized, bigsitegrps$group == i)
             startD <- paste0(min(yearchunk), "-01-01")
             endD <- paste0(max(yearchunk), "-12-31")
-
+            
             joins <- TADA_DataRetrieval(
               startDate = startD,
               endDate = endD,
@@ -692,7 +1089,7 @@ TADA_BigDataRetrieval <- function(startDate = "null",
               sampleMedia = sampleMedia,
               applyautoclean = FALSE
             )
-
+            
             if (dim(joins)[1] > 0) {
               df <- dplyr::bind_rows(df, joins)
             }
@@ -708,18 +1105,18 @@ TADA_BigDataRetrieval <- function(startDate = "null",
     warning("Query returned no data. Function returns an empty dataframe.")
     return(df_summary)
   }
-
+  
   df <- subset(df, as.Date(df$ActivityStartDate, "%Y-%m-%d") >= startDat & as.Date(df$ActivityStartDate, "%Y-%m-%d") <= endDat)
-
+  
   if (applyautoclean == TRUE) {
     print("Applying TADA_AutoClean function...")
     df <- TADA_AutoClean(df)
   }
-
+  
   # timing function for efficiency tests.
   difference <- difftime(Sys.time(), start_T, units = "mins")
   print(difference)
-
+  
   return(df)
 }
 
@@ -756,20 +1153,20 @@ TADA_JoinWQPProfiles <- function(FullPhysChem = "null",
                                  Sites = "null",
                                  Projects = "null") {
   FullPhysChem.df <- FullPhysChem
-
+  
   Sites.df <- Sites
-
+  
   Projects.df <- Projects
-
+  
   # Join station data to full phys/chem (FullPhysChem.df)
   if (length(Sites.df > 1)) {
     if (nrow(Sites.df) > 0) {
       join1 <- FullPhysChem.df %>%
         # join stations to results
         dplyr::left_join(Sites.df,
-          by = "MonitoringLocationIdentifier",
-          multiple = "all",
-          relationship = "many-to-many"
+                         by = "MonitoringLocationIdentifier",
+                         multiple = "all",
+                         relationship = "many-to-many"
         ) %>%
         # remove ".x" suffix from column names
         dplyr::rename_at(dplyr::vars(dplyr::ends_with(".x")), ~ stringr::str_replace(., "\\..$", "")) %>%
@@ -781,8 +1178,8 @@ TADA_JoinWQPProfiles <- function(FullPhysChem = "null",
   } else {
     join1 <- FullPhysChem.df
   }
-
-
+  
+  
   # Add QAPP columns from project
   if (length(Projects.df) > 1) {
     if (nrow(Projects.df) > 0) {
@@ -808,6 +1205,6 @@ TADA_JoinWQPProfiles <- function(FullPhysChem = "null",
   } else {
     join2 <- join1
   }
-
+  
   return(join2)
 }

From b268ce8cb602cec8e561d94caf28ad771f4c2e8e Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Mon, 26 Aug 2024 10:38:10 -0700
Subject: [PATCH 02/35] Add TADA_TribalOptions to R/GeospatialFunctions.R

---
 R/GeospatialFunctions.R | 227 +++++++++++++++++++++++++++-------------
 1 file changed, 156 insertions(+), 71 deletions(-)

diff --git a/R/GeospatialFunctions.R b/R/GeospatialFunctions.R
index f4a9e48f..d95875db 100644
--- a/R/GeospatialFunctions.R
+++ b/R/GeospatialFunctions.R
@@ -29,13 +29,13 @@
 #'
 TADA_MakeSpatial <- function(.data, crs = 4326) {
   if (!"LongitudeMeasure" %in% colnames(.data) |
-    !"LatitudeMeasure" %in% colnames(.data) |
-    !"HorizontalCoordinateReferenceSystemDatumName" %in% colnames(.data)) {
+      !"LatitudeMeasure" %in% colnames(.data) |
+      !"HorizontalCoordinateReferenceSystemDatumName" %in% colnames(.data)) {
     stop("The dataframe does not contain WQP-style latitude and longitude data (column names `HorizontalCoordinateReferenceSystemDatumName`, `LatitudeMeasure`, and `LongitudeMeasure`.")
   } else if (!is.null(.data) & inherits(.data, "sf")) {
     stop("Your data is already a spatial object.")
   }
-
+  
   suppressMessages(suppressWarnings({
     # Make a reference table for CRS and EPSG codes
     # List should include all codes in WQX domain (see HorizontalCoordinateReferenceSystemDatum CSV at https://www.epa.gov/waterdata/storage-and-retrieval-and-water-quality-exchange-domain-services-and-downloads)
@@ -59,7 +59,7 @@ TADA_MakeSpatial <- function(.data, crs = 4326) {
       "WGS72", 6322,
       "HARN", 4152
     )
-
+    
     # join our CRS reference table to our original WQP dataframe:
     sf <- .data %>%
       tibble::rowid_to_column(var = "index") %>%
@@ -89,7 +89,7 @@ TADA_MakeSpatial <- function(.data, crs = 4326) {
       dplyr::arrange(index) %>%
       dplyr::select(-c(index, epsg))
   }))
-
+  
   return(sf)
 }
 
@@ -120,10 +120,10 @@ fetchATTAINS <- function(.data) {
   if (is.null(.data) | nrow(.data) == 0) {
     stop("There is no data in your `data` object to use as a bounding box for selecting ATTAINS features.")
   }
-
+  
   # EPSG we want our ATTAINS data to be in (always 4326 for this function)
   our_epsg <- 4326
-
+  
   # If data is already spatial, just make sure it is in the right CRS
   # and add an index as the WQP observations' unique identifier...
   if (!is.null(.data) & inherits(.data, "sf")) {
@@ -139,7 +139,7 @@ fetchATTAINS <- function(.data) {
       # convert dataframe to a spatial object
       TADA_MakeSpatial(.data = ., crs = our_epsg)
   }
-
+  
   baseurls <- c( # ATTAINS catchments:
     "https://gispub.epa.gov/arcgis/rest/services/OW/ATTAINS_Assessment/MapServer/3/query?",
     # ATTAINS points:
@@ -149,13 +149,13 @@ fetchATTAINS <- function(.data) {
     # ATTAINS polygons:
     "https://gispub.epa.gov/arcgis/rest/services/OW/ATTAINS_Assessment/MapServer/2/query?"
   )
-
+  
   feature_downloader <- function(baseurls) {
     # starting at feature 1 (i.e., no offset):
     offset <- 0
     # empty list to store all features in
     all_features <- list()
-
+    
     # bounding box (with some minor wiggle) of user's WQP data
     suppressMessages(suppressWarnings({
       bbox <- .data %>%
@@ -165,12 +165,12 @@ fetchATTAINS <- function(.data) {
         # encode for use within the API URL
         urltools::url_encode(.)
     }))
-
+    
     # The ATTAINS API has a limit of 2000 features that can be pulled in at once.
     # Therefore, we must split the call into manageable "chunks" using a moving
     # window of what features to pull in, then munging all the separate API calls
     # together.
-
+    
     repeat {
       query <- urltools::param_set(baseurls, key = "geometry", value = bbox) %>%
         urltools::param_set(key = "inSR", value = our_epsg) %>%
@@ -191,7 +191,7 @@ fetchATTAINS <- function(.data) {
         urltools::param_set(key = "returnDistinctValues", value = "false") %>%
         urltools::param_set(key = "returnExtentOnly", value = "false") %>%
         urltools::param_set(key = "featureEncoding", value = "esriDefault")
-
+      
       # Fetch features within the offset window and append to list:
       features <- suppressMessages(suppressWarnings({
         tryCatch(
@@ -203,30 +203,30 @@ fetchATTAINS <- function(.data) {
           }
         )
       }))
-
-
+      
+      
       # Exit loop if no more features or error occurred
       if (is.null(features) || nrow(features) == 0) {
         break
       }
-
+      
       all_features <- c(all_features, list(features))
       # once done, change offset by 2000 features:
       offset <- offset + 2000
-
+      
       if (offset == 4000) {
         print("Your TADA data covers a large spatial range. The ATTAINS pull may take a while.")
       }
     }
-
+    
     all_features <- dplyr::bind_rows(all_features)
   }
-
+  
   final_features <- baseurls %>%
     purrr::map(~ feature_downloader(.))
-
+  
   names(final_features) <- c("ATTAINS_catchments", "ATTAINS_points", "ATTAINS_lines", "ATTAINS_polygons")
-
+  
   return(final_features)
 }
 
@@ -276,16 +276,16 @@ TADA_GetATTAINS <- function(.data, return_sf = TRUE) {
     "ATTAINS.visionpriority303d", "ATTAINS.areasqkm", "ATTAINS.catchmentareasqkm",
     "ATTAINS.catchmentstatecode", "ATTAINS.catchmentresolution", "ATTAINS.Shape_Area"
   )
-
+  
   if (any(attains_names %in% colnames(.data))) {
     stop("Your data has already been joined with ATTAINS data.")
   }
-
+  
   if (nrow(.data) == 0) {
     print("Your Water Quality Portal dataframe has no observations. Returning an empty dataframe with empty ATTAINS features.")
-
+    
     # if no WQP observations, return a modified `data` with empty ATTAINS-related columns:
-
+    
     col_val_list <- stats::setNames(
       object = rep(
         x = list(NA),
@@ -293,19 +293,19 @@ TADA_GetATTAINS <- function(.data, return_sf = TRUE) {
       ),
       nm = attains_names
     )
-
+    
     # Add ATTAINS columns with NA values
     no_WQP_data <- .data %>%
       dplyr::mutate(index = NA) %>%
       dplyr::bind_cols(col_val_list)
-
+    
     # In this case we'll need to return empty ATTAINS objects
     if (return_sf == TRUE) {
       ATTAINS_catchments <- NULL
       ATTAINS_lines <- NULL
       ATTAINS_points <- NULL
       ATTAINS_polygons <- NULL
-
+      
       return(list(
         "TADA_with_ATTAINS" = no_WQP_data,
         "ATTAINS_catchments" = ATTAINS_catchments,
@@ -318,14 +318,14 @@ TADA_GetATTAINS <- function(.data, return_sf = TRUE) {
       return(no_WQP_data)
     }
   }
-
+  
   # If data doesn't already contain ATTAINS data and isn't an empty dataframe:
   suppressMessages(suppressWarnings({
     sf::sf_use_s2(FALSE)
-
+    
     # If data is already spatial, just make sure it is in the right CRS
     # and add unique WQP ID for identifying obs with more than one ATTAINS assessment unit
-
+    
     if (!is.null(.data) & inherits(.data, "sf")) {
       if (sf::st_crs(.data)$epsg != 4326) {
         TADA_DataRetrieval_data <- .data %>%
@@ -344,9 +344,9 @@ TADA_GetATTAINS <- function(.data, return_sf = TRUE) {
         tibble::rowid_to_column(var = "index")
     }
   }))
-
+  
   attains_features <- try(fetchATTAINS(.data = TADA_DataRetrieval_data), silent = TRUE)
-
+  
   suppressMessages(suppressWarnings({
     # grab the ATTAINS catchments within our WQP bbox:
     nearby_catchments <- NULL
@@ -364,11 +364,11 @@ TADA_GetATTAINS <- function(.data, return_sf = TRUE) {
       silent = TRUE
     )
   }))
-
+  
   # if no ATTAINS data, return original dataframe with empty ATTAINS columns:
   if (is.null(nearby_catchments)) {
     print("There are no ATTAINS features associated with these WQP observations. Returning original dataframe with empty ATTAINS columns and empty ATTAINS geospatial features.")
-
+    
     col_val_list <- stats::setNames(
       object = rep(
         x = list(NA),
@@ -376,18 +376,18 @@ TADA_GetATTAINS <- function(.data, return_sf = TRUE) {
       ),
       nm = attains_names
     )
-
+    
     # return a modified `.data` with empty ATTAINS-related columns:
     no_ATTAINS_data <- .data %>%
       dplyr::bind_cols(col_val_list) %>%
       tibble::rowid_to_column(var = "index")
-
+    
     if (return_sf == TRUE) {
       ATTAINS_catchments <- NULL
       ATTAINS_lines <- NULL
       ATTAINS_points <- NULL
       ATTAINS_polygons <- NULL
-
+      
       return(list(
         "TADA_with_ATTAINS" = no_ATTAINS_data,
         "ATTAINS_catchments" = ATTAINS_catchments,
@@ -398,7 +398,7 @@ TADA_GetATTAINS <- function(.data, return_sf = TRUE) {
     } else {
       return(no_ATTAINS_data)
     }
-
+    
     # If there IS ATTAINS data...
   } else {
     suppressMessages(suppressWarnings({
@@ -406,18 +406,18 @@ TADA_GetATTAINS <- function(.data, return_sf = TRUE) {
       TADA_with_ATTAINS <- TADA_DataRetrieval_data %>%
         # left join = TRUE to preserve all observations (with or without ATTAINS features):
         sf::st_join(., nearby_catchments, left = TRUE)
-
+      
       if (return_sf == FALSE) {
         return(TADA_with_ATTAINS)
       }
-
+      
       # CATCHMENT FEATURES
       # use original catchment pull, but return column names to original
       ATTAINS_catchments <- nearby_catchments
       colnames(ATTAINS_catchments) <- gsub("ATTAINS.", "", colnames(ATTAINS_catchments))
       # due to the rename, must re-set geometry column:
       sf::st_geometry(ATTAINS_catchments) <- "geometry"
-
+      
       # POINT FEATURES - try to pull point AU data if it exists. Otherwise, move on...
       ATTAINS_points <- NULL
       try(
@@ -428,7 +428,7 @@ TADA_GetATTAINS <- function(.data, return_sf = TRUE) {
           dplyr::distinct(assessmentunitidentifier, .keep_all = TRUE),
         silent = TRUE
       )
-
+      
       # LINE FEATURES - try to pull line AU data if it exists. Otherwise, move on...
       ATTAINS_lines <- NULL
       try(
@@ -439,7 +439,7 @@ TADA_GetATTAINS <- function(.data, return_sf = TRUE) {
           dplyr::distinct(assessmentunitidentifier, .keep_all = TRUE),
         silent = TRUE
       )
-
+      
       # POLYGON FEATURES - try to pull polygon AU data if it exists. Otherwise, move on...
       ATTAINS_polygons <- NULL
       try(
@@ -451,7 +451,7 @@ TADA_GetATTAINS <- function(.data, return_sf = TRUE) {
         silent = TRUE
       )
     }))
-
+    
     return(list(
       "TADA_with_ATTAINS" = TADA_with_ATTAINS,
       "ATTAINS_catchments" = ATTAINS_catchments,
@@ -504,17 +504,17 @@ TADA_ViewATTAINS <- function(.data) {
   ) %in% names(.data))) {
     stop("Your input dataframe was not produced from `TADA_GetATTAINS()` or it was modified. Please create your list of ATTAINS features using `TADA_GetATTAINS()` and confirm that return_sf has been set to TRUE.")
   }
-
+  
   ATTAINS_table <- .data[["TADA_with_ATTAINS"]]
   ATTAINS_catchments <- .data[["ATTAINS_catchments"]]
   ATTAINS_points <- .data[["ATTAINS_points"]]
   ATTAINS_lines <- .data[["ATTAINS_lines"]]
   ATTAINS_polygons <- .data[["ATTAINS_polygons"]]
-
+  
   if (nrow(ATTAINS_table) == 0) {
     stop("Your WQP dataframe has no observations.")
   }
-
+  
   required_columns <- c(
     "LongitudeMeasure", "LatitudeMeasure",
     "HorizontalCoordinateReferenceSystemDatumName",
@@ -522,27 +522,27 @@ TADA_ViewATTAINS <- function(.data) {
     "MonitoringLocationName", "ResultIdentifier",
     "ActivityStartDate", "OrganizationIdentifier"
   )
-
+  
   if (!any(required_columns %in% colnames(ATTAINS_table))) {
     stop("Your dataframe does not contain the necessary WQP-style column names.")
   }
-
+  
   suppressMessages(suppressWarnings({
     sf::sf_use_s2(FALSE)
-
+    
     # if data was spatial, remove for downstream leaflet dev:
     try(ATTAINS_table <- ATTAINS_table %>%
-      sf::st_drop_geometry(), silent = TRUE)
-
+          sf::st_drop_geometry(), silent = TRUE)
+    
     tada.pal <- TADA_ColorPalette()
-
+    
     colors <- data.frame(
       overallstatus = c("Not Supporting", "Fully Supporting", "Not Assessed"),
       col = c(tada.pal[3], tada.pal[4], tada.pal[7]),
       dark_col = c(tada.pal[12], tada.pal[6], tada.pal[11]),
       priority = c(1, 2, 3)
     )
-
+    
     # POINT FEATURES - try to pull point AU data if it exists. Otherwise, move on...
     try(
       points_mapper <- ATTAINS_points %>%
@@ -554,7 +554,7 @@ TADA_ViewATTAINS <- function(.data) {
         dplyr::right_join(., tibble::as_tibble(sf::st_coordinates(ATTAINS_points)), by = c("index" = "L1")),
       silent = TRUE
     )
-
+    
     # LINE FEATURES - try to pull line AU data if it exists. Otherwise, move on...
     try(
       lines_mapper <- ATTAINS_lines %>%
@@ -562,7 +562,7 @@ TADA_ViewATTAINS <- function(.data) {
         dplyr::mutate(type = "Line Feature"),
       silent = TRUE
     )
-
+    
     # POLYGON FEATURES - try to pull polygon AU data if it exists. Otherwise, move on...
     try(
       polygons_mapper <- ATTAINS_polygons %>%
@@ -570,7 +570,7 @@ TADA_ViewATTAINS <- function(.data) {
         dplyr::mutate(type = "Polygon Feature"),
       silent = TRUE
     )
-
+    
     # Develop WQP site stats (e.g. count of observations, parameters, per site)
     sumdat <- ATTAINS_table %>%
       dplyr::group_by(MonitoringLocationIdentifier, MonitoringLocationName, LatitudeMeasure, LongitudeMeasure) %>%
@@ -586,15 +586,15 @@ TADA_ViewATTAINS <- function(.data) {
         LatitudeMeasure = as.numeric(LatitudeMeasure),
         LongitudeMeasure = as.numeric(LongitudeMeasure)
       )
-
+    
     # Basemap for AOI:
     map <- leaflet::leaflet() %>%
       leaflet::addProviderTiles("Esri.WorldTopoMap",
-        group = "World topo",
-        options = leaflet::providerTileOptions(
-          updateWhenZooming = FALSE,
-          updateWhenIdle = TRUE
-        )
+                                group = "World topo",
+                                options = leaflet::providerTileOptions(
+                                  updateWhenZooming = FALSE,
+                                  updateWhenIdle = TRUE
+                                )
       ) %>%
       leaflet::clearShapes() %>%
       leaflet::fitBounds(
@@ -614,7 +614,7 @@ TADA_ViewATTAINS <- function(.data) {
         opacity = 1,
         title = "Legend"
       )
-
+    
     # Add ATTAINS catchment outlines (if they exist):
     try(
       map <- map %>%
@@ -626,7 +626,7 @@ TADA_ViewATTAINS <- function(.data) {
         ),
       silent = TRUE
     )
-
+    
     # Add ATTAINS polygon features (if they exist):
     try(
       map <- map %>%
@@ -645,7 +645,7 @@ TADA_ViewATTAINS <- function(.data) {
         ),
       silent = TRUE
     )
-
+    
     # Add ATTAINS lines features (if they exist):
     try(
       map <- map %>%
@@ -663,7 +663,7 @@ TADA_ViewATTAINS <- function(.data) {
         ),
       silent = TRUE
     )
-
+    
     # Add ATTAINS point features (if they exist):
     try(
       map <- map %>%
@@ -682,7 +682,7 @@ TADA_ViewATTAINS <- function(.data) {
         ),
       silent = TRUE
     )
-
+    
     # Add WQP observation features (should always exist):
     try(
       map <- map %>%
@@ -702,12 +702,97 @@ TADA_ViewATTAINS <- function(.data) {
         ),
       silent = TRUE
     )
-
+    
     if (is.null(ATTAINS_lines) & is.null(ATTAINS_points) & is.null(ATTAINS_polygons)) {
       print("No ATTAINS data associated with this Water Quality Portal data.")
     }
-
+    
     # Return leaflet map of TADA WQ and its associated ATTAINS data
     return(map)
   }))
 }
+
+
+#' Access options available for querying tribal spatial data with `TADA_DataRetrieval()`.
+#' 
+#' @description
+#' This function provides access to [six layer datasets](https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer)
+#' containing spatial data related to tribal lands: "Alaska Native Allotments",
+#' "Alaska Native Villages", "American Indian Reservations", "Off-reservation Trust Lands",
+#' "Oklahoma Tribal Statistical Areas", and "Virginia Federally Recognized Tribes".
+#' These datasets are used by `TADA_DataRetrieval()` when retrieving spatial data
+#' for tribal lands specified by the user. 
+#' 
+#' The purpose of `TADA_TribalOptions()` is to allow the user to review the available
+#' data in those datasets and identify the records they would like to query with
+#' `TADA_DataRetrieval()`.
+#' 
+#' An interactive map of the six layer datasets is available on ArcGIS Online Map
+#' Viewer here: https://www.arcgis.com/apps/mapviewer/index.html?url=https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer&source=sd
+#' 
+#' @param tribal_area_type A character string. Must be one of the six tribal
+#' spatial layers: "Alaska Native Allotments", "Alaska Native Villages", 
+#' "American Indian Reservations", "Off-reservation Trust Lands",
+#' "Oklahoma Tribal Statistical Areas", or "Virginia Federally Recognized Tribes".
+#' 
+#' @param return_sf Logical. Should the function return the dataset as an `sf`
+#' object (TRUE) or a data frame (FALSE)? Defaults to FALSE.
+#' 
+#' @returns A data frame or `sf` object containing the specified layer from the EPA
+#' Map Service.
+#' 
+#' @note
+#' Alaska Native Villages and Virginia Federally Recognized Tribes are point
+#' geometries in the Map Service, not polygons. At the time of this writing they
+#' do not return any data when used for WQP bbox queries.
+#' 
+#' @seealso [TADA_DataRetrieval()]
+#' 
+
+TADA_TribalOptions <- function(tribal_area_type, return_sf = FALSE){
+  
+  # Make a reference table for tribal area type + url matching
+  map_service_urls <- tibble::tribble(
+    ~tribal_area,                            ~url,
+    "Alaska Native Allotments",              "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/0",
+    "Alaska Native Villages",                "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/1",
+    "American Indian Reservations",          "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/2",
+    "Off-reservation Trust Lands",           "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/3",
+    "Oklahoma Tribal Statistical Areas",     "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/4",
+    "Virginia Federally Recognized Tribes",  "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/5"
+  )
+  
+  # Confirm usable string provided
+  if( !(tribal_area_type %in% map_service_urls$tribal_area) ){
+    stop("tribal_area_type must match one of the six tribal spatial layer names.")
+  }
+  
+  if( tribal_area_type %in%
+      c("Alaska Native Villages", "Virginia Federally Recognized Tribes") ){
+    warning(
+      paste0(
+        "Alaska Native Villages and Virginia Federally Recognized Tribes are point geometries in the Map Service, not polygons. ",
+        "At the time of this writing they do not return any data when used for WQP bbox queries."
+      )
+    )
+  }
+  
+  # Query Map Service
+  tribal_area_sf <- dplyr::filter(map_service_urls,
+                                  tribal_area == tribal_area_type)$url %>%
+    arcgislayers::arc_open() %>%
+    # Return sf
+    arcgislayers::arc_select() %>%
+    sf::st_make_valid()
+  
+  # Convert to df if needed, export
+  if(return_sf == FALSE){
+    return(
+      as.data.frame(tribal_area_sf) %>%
+        sf::st_drop_geometry()
+    )
+  } else {
+    return(tribal_area_sf)
+  }
+  
+}

From 49ddc839c3c821c0f3841b3b27210a2c85733735 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Wed, 20 Nov 2024 12:39:28 -0800
Subject: [PATCH 03/35] tribal options edits

---
 NAMESPACE                  |  1 +
 R/DataDiscoveryRetrieval.R | 68 ++++++++++++++++++++------------------
 R/GeospatialFunctions.R    |  2 ++
 man/TADA_DataRetrieval.Rd  | 24 +++++++++++++-
 man/TADA_TribalOptions.Rd  | 44 ++++++++++++++++++++++++
 5 files changed, 105 insertions(+), 34 deletions(-)
 create mode 100644 man/TADA_TribalOptions.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 9bd8f6cb..0b286c45 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -74,6 +74,7 @@ export(TADA_SimpleCensoredMethods)
 export(TADA_Stats)
 export(TADA_SubstituteDeprecatedChars)
 export(TADA_SummarizeColumn)
+export(TADA_TribalOptions)
 export(TADA_TwoCharacteristicScatterplot)
 export(TADA_UniqueCharUnitSpeciation)
 export(TADA_ViewATTAINS)
diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 8bae1989..a5f2ff59 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -247,17 +247,13 @@ TADA_DataRetrieval <- function(startDate = "null",
     stop("A tribal_area_type is required if tribe_name_parcel is provided.")
   }
   
-  
   # Set query parameters
   WQPquery <- list()
   
-  
   # If an sf object OR tribal info are provided they will be the basis of the query
   # (The tribal data handling uses sf objects as well)
   if( (!is.null(aoi_sf) & inherits(aoi_sf, "sf")) | (tribal_area_type != "null") ){
     
-    sf::sf_use_s2(FALSE)
-    
     # Build the non-sf part of the query:
     
     # StartDate
@@ -399,6 +395,9 @@ TADA_DataRetrieval <- function(startDate = "null",
       
     }
     
+    # Check and/or fix geometry
+    aoi_sf <- sf::st_make_valid(aoi_sf)
+    
     # Match CRS
     if(sf::st_crs(aoi_sf) != 4326){
       aoi_sf <- sf::st_transform(aoi_sf, crs = 4326)
@@ -1000,35 +999,7 @@ TADA_BigDataRetrieval <- function(startDate = "null",
     rm(df_summary)
     # if there are still site records when filtered to years of interest....
     if (dim(sites)[1] > 0) {
-      # function for chunking by records
-      make_groups <- function(x, maxrecs) {
-        if (sum(x$tot_n) <= maxrecs | dim(x)[1] == 1) { # WARNING: if there's only one row and it's more than maxrecs, it will try to run the query anyway
-          groupings <- x
-          groupings$group <- 1
-        } else {
-          groupings <- data.frame()
-          group <- data.frame()
-          i <- 1
-          while (nrow(x) > nrow(group)) {
-            x$csum <- cumsum(x$tot_n)
-            brk <- which(x$csum > maxrecs)[1]
-            group <- x[1:(brk - 1), ]
-            group$group <- i
-            if (brk > 1) {
-              x <- x[brk:length(x$tot_n), ]
-            } else {
-              x <- x[2:length(x$tot_n), ]
-            }
-            i <- i + 1
-            groupings <- plyr::rbind.fill(groupings, group)
-          }
-          
-          x$group <- i
-          
-          groupings <- plyr::rbind.fill(groupings, x)
-        }
-        return(groupings)
-      }
+     
       
       
       # get total number of results per site and separate out sites with >250000 results
@@ -1212,3 +1183,34 @@ TADA_JoinWQPProfiles <- function(FullPhysChem = "null",
   
   return(join2)
 }
+
+
+# function for chunking by records
+make_groups <- function(x, maxrecs) {
+  if (sum(x$tot_n) <= maxrecs | dim(x)[1] == 1) { # WARNING: if there's only one row and it's more than maxrecs, it will try to run the query anyway
+    groupings <- x
+    groupings$group <- 1
+  } else {
+    groupings <- data.frame()
+    group <- data.frame()
+    i <- 1
+    while (nrow(x) > nrow(group)) {
+      x$csum <- cumsum(x$tot_n)
+      brk <- which(x$csum > maxrecs)[1]
+      group <- x[1:(brk - 1), ]
+      group$group <- i
+      if (brk > 1) {
+        x <- x[brk:length(x$tot_n), ]
+      } else {
+        x <- x[2:length(x$tot_n), ]
+      }
+      i <- i + 1
+      groupings <- plyr::rbind.fill(groupings, group)
+    }
+    
+    x$group <- i
+    
+    groupings <- plyr::rbind.fill(groupings, x)
+  }
+  return(groupings)
+}
diff --git a/R/GeospatialFunctions.R b/R/GeospatialFunctions.R
index d95875db..8b6bb16b 100644
--- a/R/GeospatialFunctions.R
+++ b/R/GeospatialFunctions.R
@@ -748,6 +748,8 @@ TADA_ViewATTAINS <- function(.data) {
 #' 
 #' @seealso [TADA_DataRetrieval()]
 #' 
+#' @export
+#' 
 
 TADA_TribalOptions <- function(tribal_area_type, return_sf = FALSE){
   
diff --git a/man/TADA_DataRetrieval.Rd b/man/TADA_DataRetrieval.Rd
index dfd58fd8..e49f1e03 100644
--- a/man/TADA_DataRetrieval.Rd
+++ b/man/TADA_DataRetrieval.Rd
@@ -7,11 +7,14 @@
 TADA_DataRetrieval(
   startDate = "null",
   endDate = "null",
+  aoi_sf = NULL,
   countrycode = "null",
   countycode = "null",
   huc = "null",
   siteid = "null",
   siteType = "null",
+  tribal_area_type = "null",
+  tribe_name_parcel = "null",
   characteristicName = "null",
   characteristicType = "null",
   sampleMedia = "null",
@@ -27,6 +30,8 @@ TADA_DataRetrieval(
 
 \item{endDate}{End Date string in the format YYYY-MM-DD, for example, "2020-01-01"}
 
+\item{aoi_sf}{An sf object to use for a query area of interest}
+
 \item{countrycode}{Code that identifies a country or ocean (e.g. countrycode = "CA" for Canada, countrycode = "OA" for Atlantic Ocean). See https://www.waterqualitydata.us/Codes/countrycode for options.}
 
 \item{countycode}{FIPS county name. Note that a state code must also be supplied (e.g. statecode = "AL", countycode = "Chilton"). See https://www.waterqualitydata.us/Codes/countycode for options.}
@@ -37,6 +42,10 @@ TADA_DataRetrieval(
 
 \item{siteType}{Type of waterbody. See https://www.waterqualitydata.us/Codes/sitetype for options.}
 
+\item{tribal_area_type}{One of the six tribal spatial layers: "Alaska Native Allotments", "Alaska Native Villages",  "American Indian Reservations", "Off-reservation Trust Lands", "Oklahoma Tribal Statistical Areas", or "Virginia Federally Recognized Tribes".}
+
+\item{tribe_name_parcel}{The name of a tribe corresponding to an entry in the TRIBE_NAME field of the specified tribal_area_type. OR if the type is Alaska Native Allotments" then the corresponding PARCEL_NO.}
+
 \item{characteristicName}{Name of parameter. See https://www.waterqualitydata.us/Codes/characteristicName for options.}
 
 \item{characteristicType}{Groups of environmental measurements/parameters. See https://www.waterqualitydata.us/Codes/characteristicType for options.}
@@ -62,7 +71,9 @@ dataframe. Note that the inputs (e.g. project, organization, siteType) with the
 exceptions of endDate and startDate match the web service call format from the
 online WQP GUI. endDate and startDate match the format suggested in USGS's
 dataRetrieval package (endDate = "YYYY-MM-DD"), which is a more familiar date
-format for R users than the WQP GUI's endDateHi = "MM-DD-YYYY".
+format for R users than the WQP GUI's endDateHi = "MM-DD-YYYY". aoi_sf,
+tribal_area_type, and tribe_name_parcel do not have corresponding inputs in
+the web service.
 }
 \details{
 Multiple fields are queried together using AND logic, but multiple values within
@@ -76,6 +87,11 @@ of the query fields.
 characteristicName and Characteristic Group also work as an AND, therefore the
 characteristicName must fall within the Characteristic Group when both are entered.
 
+aoi_sf cannot be used with tribal_area_type. If countrycode, countycode, huc,
+siteid, or statecode are used with aoi_sf or tribal_area_type they will be ignored
+under the assumption that the sf object or tribal location are the intended
+area of interest.
+
 Users can reference the \href{https://www.epa.gov/waterdata/storage-and-retrieval-and-water-quality-exchange-domain-services-and-downloads}{WQX domain tables}
 to find allowable values for queries, e.g., reference the WQX domain table to find countycode and statecode: https://cdx.epa.gov/wqx/download/DomainValues/County_CSV.zip
 Alternatively, you can use the WQP services to find areas where data is available in the US: https://www.waterqualitydata.us/Codes/countycode
@@ -88,6 +104,12 @@ Note: TADA_DataRetrieval (by leveraging dataRetrieval),  automatically converts
 the date times to UTC. It also automatically converts the data to dates,
 datetimes, numerics based on a standard algorithm. See: ?dataRetrieval::readWQPdata
 }
+\note{
+Alaska Native Villages and Virginia Federally Recognized Tribes are point
+geometries in the Map Service, not polygons. At the time of this writing they
+do not return any data when used for WQP bbox queries and so are set to return
+errors when used with this function.
+}
 \examples{
 \dontrun{
 # example for WI
diff --git a/man/TADA_TribalOptions.Rd b/man/TADA_TribalOptions.Rd
new file mode 100644
index 00000000..05396d87
--- /dev/null
+++ b/man/TADA_TribalOptions.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/GeospatialFunctions.R
+\name{TADA_TribalOptions}
+\alias{TADA_TribalOptions}
+\title{Access options available for querying tribal spatial data with \code{TADA_DataRetrieval()}.}
+\usage{
+TADA_TribalOptions(tribal_area_type, return_sf = FALSE)
+}
+\arguments{
+\item{tribal_area_type}{A character string. Must be one of the six tribal
+spatial layers: "Alaska Native Allotments", "Alaska Native Villages",
+"American Indian Reservations", "Off-reservation Trust Lands",
+"Oklahoma Tribal Statistical Areas", or "Virginia Federally Recognized Tribes".}
+
+\item{return_sf}{Logical. Should the function return the dataset as an \code{sf}
+object (TRUE) or a data frame (FALSE)? Defaults to FALSE.}
+}
+\value{
+A data frame or \code{sf} object containing the specified layer from the EPA
+Map Service.
+}
+\description{
+This function provides access to \href{https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer}{six layer datasets}
+containing spatial data related to tribal lands: "Alaska Native Allotments",
+"Alaska Native Villages", "American Indian Reservations", "Off-reservation Trust Lands",
+"Oklahoma Tribal Statistical Areas", and "Virginia Federally Recognized Tribes".
+These datasets are used by \code{TADA_DataRetrieval()} when retrieving spatial data
+for tribal lands specified by the user.
+
+The purpose of \code{TADA_TribalOptions()} is to allow the user to review the available
+data in those datasets and identify the records they would like to query with
+\code{TADA_DataRetrieval()}.
+
+An interactive map of the six layer datasets is available on ArcGIS Online Map
+Viewer here: https://www.arcgis.com/apps/mapviewer/index.html?url=https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer&source=sd
+}
+\note{
+Alaska Native Villages and Virginia Federally Recognized Tribes are point
+geometries in the Map Service, not polygons. At the time of this writing they
+do not return any data when used for WQP bbox queries.
+}
+\seealso{
+\code{\link[=TADA_DataRetrieval]{TADA_DataRetrieval()}}
+}

From 57e5a036bb207bb1e251a90d425a017783e337a0 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Wed, 20 Nov 2024 12:47:15 -0800
Subject: [PATCH 04/35] TADA_DR rewrite

---
 R/DataDiscoveryRetrieval.R | 218 ++++++++++++++++++++++++-------------
 1 file changed, 140 insertions(+), 78 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index a5f2ff59..f689ae47 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -194,11 +194,10 @@ TADA_DataRetrieval <- function(startDate = "null",
                                organization = "null",
                                project = "null",
                                providers = "null",
+                               maxrecs = 250000,
                                applyautoclean = TRUE) {
-  
-  
-  
-  # Check for incomplete or inconsistent inputs:
+
+    # Check for incomplete or inconsistent inputs:
   
   # If both an sf object and tribe information are provided it's unclear what
   # the priority should be for the query
@@ -210,7 +209,7 @@ TADA_DataRetrieval <- function(startDate = "null",
         "Please use only one of these query options."
       )
     )
-  } 
+  }
   
   # Check for other arguments that indicate location. Function will ignore
   # these inputs but warn the user
@@ -406,55 +405,59 @@ TADA_DataRetrieval <- function(startDate = "null",
     # Get bbox of the sf object
     input_bbox <- sf::st_bbox(aoi_sf)
     
-    # Query site info within the bbox
-    bbox_sites <- dataRetrieval::whatWQPsites(
+    # Query info on available data within the bbox
+    bbox_avail <- dataRetrieval::whatWQPdata(
       WQPquery,
       bBox = c(input_bbox$xmin, input_bbox$ymin, input_bbox$xmax, input_bbox$ymax)
     )
     
     # Check if any sites are within the aoi
-    if ( (nrow(bbox_sites) > 0 ) == FALSE) {
+    if ( (nrow(bbox_avail) > 0 ) == FALSE) {
       stop("No monitoring sites were returned within your area of interest (no data available).")
     }
     
     # Reformat returned info as sf
-    bbox_sites_sf <- TADA_MakeSpatial(bbox_sites, crs = 4326)
+    bbox_sites_sf <- dataRetrieval::whatWQPsites(
+      siteid = bbox_avail$MonitoringLocationIdentifier
+    ) %>%
+      TADA_MakeSpatial(., crs = 4326)
     
     # Subset sites to only within shapefile and get IDs
     clipped_sites_sf <- bbox_sites_sf[aoi_sf, ]
     
     clipped_site_ids <- clipped_sites_sf$MonitoringLocationIdentifier
     
-    # Check number of sites returned. More than 300 will require a map() approach
-    if( length(clipped_site_ids) > 300 ) {
+    record_count <- bbox_avail %>%
+      dplyr::filter(MonitoringLocationIdentifier %in% clipped_site_ids) %>%
+      dplyr::pull(resultCount) %>%
+      sum()
+    
+    site_count <- length(clipped_site_ids)
+    
+    # Check for either more than 300 sites or more records than max_recs.
+    # If either is true then we'll approach the pull as a "big data" pull
+    if( site_count > 300 | record_count > maxrecs) {
       warning(
         paste0(
-          "More than 300 sites are matched by the AOI and query terms. ",
+          "The number of sites and/or records matched by the AOI and query terms is large, so the download may take some time. ",
           "If your AOI is a county, state, country, or HUC boundary it would be more efficient to provide a code instead of an sf object."
         )
       )
       
-      # Split IDs into a list
-      id_cluster_list <- split(x = clipped_site_ids,
-                               f = ceiling(seq_along(clipped_site_ids) / 300))
-      
-      print("Downloading WQP query results. This may take some time depending upon the query size.")
+      # Use helper function to download large data volume
+      results.DR <- suppressMessages(
+        TADA_BigDataHelper(
+          record_summary = bbox_avail %>%
+            dplyr::select(MonitoringLocationIdentifier, resultCount) %>%
+            dplyr::filter(MonitoringLocationIdentifier %in% clipped_site_ids),
+          WQPquery = WQPquery,
+          maxrecs = maxrecs,
+          maxsites = 300
+        )
+      )
       
-      # List of query results
-      results.DR <- purrr::map(
-        .x = id_cluster_list,
-        .f = ~suppressMessages(
-          dataRetrieval::readWQPdata(
-            siteid = .x,
-            WQPquery,
-            dataProfile = "resultPhysChem",
-            ignore_attributes = TRUE
-          )
-        ) %>%
-          # To allow row binds
-          dplyr::mutate(across(everything(), as.character))
-      ) %>%
-        list_rbind()
+      rm(bbox_avail, bbox_sites_sf)
+      gc()
       
       # Check if any results were returned
       if ( (nrow(results.DR) > 0 ) == FALSE) {
@@ -466,6 +469,7 @@ TADA_DataRetrieval <- function(startDate = "null",
             "Removing some of your query filters OR broadening your search area may help."
           )
         )
+        # Empty
         TADAprofile.clean <- results.DR
       } else {
         
@@ -487,13 +491,8 @@ TADA_DataRetrieval <- function(startDate = "null",
           FullPhysChem = results.DR,
           Sites = sites.DR,
           Projects = projects.DR
-        )
-        
-        # need to specify this or throws error when trying to bind rows.
-        # Temporary fix for larger issue where data structure for all columns
-        # should be specified.
-        TADAprofile <- TADAprofile %>% dplyr::mutate(
-          across(everything(), as.character)
+        ) %>% dplyr::mutate(
+          across(tidyselect::everything(), as.character)
         )
         
         # run TADA_AutoClean function
@@ -508,7 +507,7 @@ TADA_DataRetrieval <- function(startDate = "null",
       
       return(TADAprofile.clean)
       
-      # Less than 300 sites:
+      # Doesn't meet "big data" threshold:
     } else {
       
       # Retrieve all 3 profiles
@@ -523,7 +522,7 @@ TADA_DataRetrieval <- function(startDate = "null",
         ignore_attributes = TRUE
       )
       
-      # check if any results were returned
+      # Check if any results were returned
       if ((nrow(results.DR) > 0) == FALSE) {
         paste0(
           "Returning empty results dataframe: ",
@@ -535,28 +534,28 @@ TADA_DataRetrieval <- function(startDate = "null",
       } else {
         
         # Get site metadata
-        sites.DR <- dataRetrieval::whatWQPsites(WQPquery)
+        sites.DR <- clipped_sites_sf %>%
+          as_tibble() %>%
+          select(-geometry)
         
         # Get project metadata
-        projects.DR <- dataRetrieval::readWQPdata(WQPquery,
-                                                  ignore_attributes = TRUE,
-                                                  service = "Project")
+        projects.DR <- dataRetrieval::readWQPdata(
+          siteid = clipped_site_ids,
+          WQPquery,
+          ignore_attributes = TRUE,
+          service = "Project"
+        )
         
         # Join results, sites, projects
         TADAprofile <- TADA_JoinWQPProfiles(
           FullPhysChem = results.DR,
           Sites = sites.DR,
           Projects = projects.DR
+        ) %>% dplyr::mutate(
+          across(tidyselect::everything(), as.character)
         )
         
-        # need to specify this or throws error when trying to bind rows.
-        # Temporary fix for larger issue where data structure for all columns
-        # should be specified.
-        TADAprofile <- TADAprofile %>% dplyr::mutate(
-          across(everything(), as.character)
-        )
-        
-        # run TADA_AutoClean function
+        # Run TADA_AutoClean function
         if (applyautoclean == TRUE) {
           print("Data successfully downloaded. Running TADA_AutoClean function.")
           
@@ -572,6 +571,7 @@ TADA_DataRetrieval <- function(startDate = "null",
     
     # If no sf object provided:
   } else {  
+    
     # Set query parameters
     WQPquery <- list()
     
@@ -678,37 +678,58 @@ TADA_DataRetrieval <- function(startDate = "null",
       WQPquery <- c(WQPquery, endDate = endDate)
     }
     
-    # Retrieve all 3 profiles
-    print("Downloading WQP query results. This may take some time depending upon the query size.")
-    print(WQPquery)
-    results.DR <- dataRetrieval::readWQPdata(WQPquery,
-                                             dataProfile = "resultPhysChem",
-                                             ignore_attributes = TRUE
-    )
-    # check if any results are available
-    if ((nrow(results.DR) > 0) == FALSE) {
-      print("Returning empty results dataframe: Your WQP query returned no results (no data available). Try a different query. Removing some of your query filters OR broadening your search area may help.")
-      TADAprofile.clean <- results.DR
-    } else {
-      sites.DR <- dataRetrieval::whatWQPsites(WQPquery)
+    # Query info on available data
+    query_avail <- dataRetrieval::whatWQPdata(WQPquery)
+    
+    site_count <- length(query_avail$MonitoringLocationIdentifier)
+    
+    record_count <- query_avail %>%
+      dplyr::pull(resultCount) %>%
+      sum()
+    
+    # Check for either more than 300 sites or more records than max_recs.
+    # If either is true then we'll approach the pull as a "big data" pull
+    if(site_count > 300 | record_count > maxrecs) {
+      warning(
+        "The number of sites and/or records matched by the query terms is large, so the download may take some time."
+      )
+      
+      # Use helper function to download large data volume
+      results.DR <- suppressMessages(
+        TADA_BigDataHelper(
+          record_summary = query_avail %>%
+            dplyr::select(MonitoringLocationIdentifier, resultCount),
+          WQPquery = WQPquery,
+          maxrecs = maxrecs,
+          maxsites = 300
+        )
+      )
+      
+      rm(query_avail)
+      gc()
       
-      projects.DR <- dataRetrieval::readWQPdata(WQPquery,
-                                                ignore_attributes = TRUE,
-                                                service = "Project"
+      # Get site metadata
+      sites.DR <- dataRetrieval::whatWQPsites(
+        siteid = unique(results.DR$MonitoringLocationIdentifier)
       )
       
+      # Get project metadata
+      projects.DR <- dataRetrieval::readWQPdata(
+        siteid = unique(results.DR$MonitoringLocationIdentifier),
+        WQPquery,
+        ignore_attributes = TRUE,
+        service = "Project"
+      )
+      
+      # Join results, sites, projects
       TADAprofile <- TADA_JoinWQPProfiles(
         FullPhysChem = results.DR,
         Sites = sites.DR,
         Projects = projects.DR
+      ) %>% dplyr::mutate(
+        across(tidyselect::everything(), as.character)
       )
       
-      # need to specify this or throws error when trying to bind rows. Temporary fix for larger
-      # issue where data structure for all columns should be specified.
-      cols <- names(TADAprofile)
-      
-      TADAprofile <- TADAprofile %>% dplyr::mutate_at(cols, as.character)
-      
       # run TADA_AutoClean function
       if (applyautoclean == TRUE) {
         print("Data successfully downloaded. Running TADA_AutoClean function.")
@@ -717,11 +738,52 @@ TADA_DataRetrieval <- function(startDate = "null",
       } else {
         TADAprofile.clean <- TADAprofile
       }
+      
+      return(TADAprofile.clean)
+      
+      # If not a "big data" pull:
+    } else {
+      # Retrieve all 3 profiles
+      print("Downloading WQP query results. This may take some time depending upon the query size.")
+      print(WQPquery)
+      results.DR <- dataRetrieval::readWQPdata(WQPquery,
+                                               dataProfile = "resultPhysChem",
+                                               ignore_attributes = TRUE
+      )
+      
+      # check if any results are available
+      if ((nrow(results.DR) > 0) == FALSE) {
+        print("Returning empty results dataframe: Your WQP query returned no results (no data available). Try a different query. Removing some of your query filters OR broadening your search area may help.")
+        TADAprofile.clean <- results.DR
+      } else {
+        sites.DR <- dataRetrieval::whatWQPsites(WQPquery)
+        
+        projects.DR <- dataRetrieval::readWQPdata(WQPquery,
+                                                  ignore_attributes = TRUE,
+                                                  service = "Project"
+        )
+        
+        TADAprofile <- TADA_JoinWQPProfiles(
+          FullPhysChem = results.DR,
+          Sites = sites.DR,
+          Projects = projects.DR
+        ) %>% dplyr::mutate(
+          across(tidyselect::everything(), as.character)
+        )
+        
+        # run TADA_AutoClean function
+        if (applyautoclean == TRUE) {
+          print("Data successfully downloaded. Running TADA_AutoClean function.")
+          
+          TADAprofile.clean <- TADA_AutoClean(TADAprofile)
+        } else {
+          TADAprofile.clean <- TADAprofile
+        }
+      }
+      
+      return(TADAprofile.clean)
     }
-    
-    return(TADAprofile.clean)
   }
-  
 }
 
 

From 7d9ac2aa74a87a525ecc42837184a1c601b64aa3 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Wed, 20 Nov 2024 12:51:03 -0800
Subject: [PATCH 05/35] Helper function for large queries

---
 R/DataDiscoveryRetrieval.R | 93 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index f689ae47..1d9923e4 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -865,6 +865,99 @@ TADA_ReadWQPWebServices <- function(webservice) {
 }
 
 
+TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs, maxsites = 300){
+  
+  # Get total number of results per site and separate out sites with >maxrecs results
+  tot_sites <- record_summary %>%
+    # dplyr::filter(MonitoringLocationIdentifier %in% clipped_site_ids) %>%
+    dplyr::group_by(MonitoringLocationIdentifier) %>%
+    dplyr::summarise(tot_n = sum(resultCount)) %>%
+    dplyr::filter(tot_n > 0) %>%
+    dplyr::arrange(tot_n)
+  
+  # Sites with less than/equal to maxrecs
+  smallsites <- tot_sites %>% dplyr::filter(tot_n <= maxrecs)
+  # Sites with more than maxrecs
+  bigsites <- tot_sites %>% dplyr::filter(tot_n > maxrecs)
+  
+  df_small <- data.frame()
+  df_big <- data.frame()
+  
+  # Work with small sites first:
+  # Build download groups. Total record count limited to value of maxrecs.
+  # Number of sites per download group limited to 300.
+  if (dim(smallsites)[1] > 0) {
+    smallsitesgrp <- smallsites %>%
+      mutate(group = MESS::cumsumbinning(
+        x = tot_n,
+        threshold = maxrecs,
+        maxgroupsize = 300
+      ))
+    
+    # Status update to user
+    print(
+      paste0("Downloading data from sites with fewer than ",
+             maxrecs,
+             " results by grouping them together.")
+    )
+    
+    # Download the data for each group
+    for (i in 1:max(smallsitesgrp$group)) {
+      small_site_chunk <- subset(smallsitesgrp$MonitoringLocationIdentifier,
+                                 smallsitesgrp$group == i)
+      # Query result data
+      results_small <- dataRetrieval::readWQPdata(
+        siteid = small_site_chunk,
+        WQPquery,
+        dataProfile = "resultPhysChem",
+        ignore_attributes = TRUE
+      ) %>%
+        dplyr::mutate(across(everything(), as.character))
+      
+      # If data is returned, stack with what's already been retrieved
+      if (dim(results_small)[1] > 0) {
+        df_small <- dplyr::bind_rows(df_small, results_small)
+      }
+    }
+    
+    rm(smallsites, smallsitesgrp)
+    gc()
+    
+    # Large sites (>= maxrecs) next:
+    if (dim(bigsites)[1] > 0) {
+      print(
+        paste0("Downloading data from sites with greater than ",
+               maxrecs,
+               " results, chunking queries by site.")
+      )
+      
+      # Unique site IDs
+      bsitesvec <- unique(bigsites$MonitoringLocationIdentifier)
+      
+      # For each site
+      for (i in 1:length(bsitesvec)) {
+        # Download each site's data individually
+        results_big <- dataRetrieval::readWQPdata(
+          siteid = bsitesvec[i],
+          WQPquery,
+          dataProfile = "resultPhysChem",
+          ignore_attributes = TRUE
+        ) %>%
+          dplyr::mutate(across(everything(), as.character))
+        
+        if (dim(results_big)[1] > 0) {
+          df_big <- dplyr::bind_rows(df_big, results_big)
+        }
+      }
+    }
+    rm(bigsites)
+    gc()
+  }
+  
+  df_out <- bind_rows(df_small, df_big)
+  
+  return(df_out)
+}
 
 #' Large WQP data pulls using dataRetrieval
 #'

From 2b06dbd20209302c26636eb87e6ea6b2950113da Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Wed, 20 Nov 2024 13:12:23 -0800
Subject: [PATCH 06/35] Document bigdatahelper

---
 R/DataDiscoveryRetrieval.R | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 1d9923e4..56a2e57e 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -864,12 +864,25 @@ TADA_ReadWQPWebServices <- function(webservice) {
   }
 }
 
-
-TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs, maxsites = 300){
+#' Assist with large WQP data pulls using dataRetrieval
+#'
+#' This is a helper function that takes large WQP (waterqualitydata.us) queries
+#' and splits them up into multiple, smaller queries. By default it pulls data
+#' for up to 300 sites or 250,000 data records at a time and then joins the separate
+#' pulls back together to produce a single TADA compatible dataframe as the output.
+#' Computer memory may limit the size of data frames that your R console will
+#' be able to hold in one session.
+#'
+#' @param record_summary MonitoringLocationIdentifier and resultCount columns from the output of `dataRetrieval::whatWQPdata` for the WQP query being used.
+#' @param WQPquery A named list of query terms to supply dataRetrieval functions.
+#' @param maxrecs Maximum number of records to query at once.
+#' @param maxsites Maximum number of sites to query at once.
+#'
+#' @return TADA-compatible dataframe
+TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsites = 300){
   
   # Get total number of results per site and separate out sites with >maxrecs results
   tot_sites <- record_summary %>%
-    # dplyr::filter(MonitoringLocationIdentifier %in% clipped_site_ids) %>%
     dplyr::group_by(MonitoringLocationIdentifier) %>%
     dplyr::summarise(tot_n = sum(resultCount)) %>%
     dplyr::filter(tot_n > 0) %>%

From 929dc3740330facf4d7126d11781543ac8e770e8 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Thu, 21 Nov 2024 08:42:16 -0800
Subject: [PATCH 07/35] update geospatial funs

---
 R/GeospatialFunctions.R | 2123 +++++++++++++++++++++++++++++----------
 1 file changed, 1610 insertions(+), 513 deletions(-)

diff --git a/R/GeospatialFunctions.R b/R/GeospatialFunctions.R
index 8b6bb16b..85f9a9d2 100644
--- a/R/GeospatialFunctions.R
+++ b/R/GeospatialFunctions.R
@@ -2,7 +2,7 @@
 #'
 #' Transform a Water Quality Portal dataframe into a geospatial sf object.
 #'
-#' Adds one new column to input dataset, 'geometry', which allows for mapping and additional geospatial capabilities. Check out the TADAModule2.Rmd for an example workflow.
+#' Adds one new column to input dataframe, 'geometry', which allows for mapping and additional geospatial capabilities. Check out the TADAModule2.Rmd for an example workflow.
 #'
 #' @param .data A dataframe created by `TADA_DataRetrieval()`.
 #' @param crs The coordinate reference system (CRS) you would like the returned point features to be in. The default is CRS 4326 (WGS84).
@@ -29,13 +29,15 @@
 #'
 TADA_MakeSpatial <- function(.data, crs = 4326) {
   if (!"LongitudeMeasure" %in% colnames(.data) |
-      !"LatitudeMeasure" %in% colnames(.data) |
-      !"HorizontalCoordinateReferenceSystemDatumName" %in% colnames(.data)) {
+    !"LatitudeMeasure" %in% colnames(.data) |
+    !"HorizontalCoordinateReferenceSystemDatumName" %in% colnames(.data)) {
     stop("The dataframe does not contain WQP-style latitude and longitude data (column names `HorizontalCoordinateReferenceSystemDatumName`, `LatitudeMeasure`, and `LongitudeMeasure`.")
   } else if (!is.null(.data) & inherits(.data, "sf")) {
     stop("Your data is already a spatial object.")
   }
-  
+
+  print("Transforming your data into a spatial object.")
+
   suppressMessages(suppressWarnings({
     # Make a reference table for CRS and EPSG codes
     # List should include all codes in WQX domain (see HorizontalCoordinateReferenceSystemDatum CSV at https://www.epa.gov/waterdata/storage-and-retrieval-and-water-quality-exchange-domain-services-and-downloads)
@@ -44,8 +46,9 @@ TADA_MakeSpatial <- function(.data, crs = 4326) {
       "NAD83", 4269,
       "WGS84", 4326,
       "NAD27", 4267,
-      "UNKWN", crs, # Unknowns and NAs should go to user supplied default
-      "OTHER", 4326,
+      "UNKWN", crs, # Unknowns and NAs should go to user supplied crs or default
+      "Unknown", crs,
+      "OTHER", crs,
       "OLDHI", 4135,
       "AMSMA", 4169,
       "ASTRO", 4727,
@@ -59,13 +62,25 @@ TADA_MakeSpatial <- function(.data, crs = 4326) {
       "WGS72", 6322,
       "HARN", 4152
     )
-    
+
+    # Check the CRS column for NA or "UNKWN" and warn user if any are found
+    if (any(is.na(.data$HorizontalCoordinateReferenceSystemDatumName)) ||
+      any(.data$HorizontalCoordinateReferenceSystemDatumName %in% c("UNKWN", "Unknown", "OTHER"))) {
+      print(paste0("Your WQP data frame contains observations without a listed coordinate reference system (CRS). For these, we have assigned CRS ", crs, "."))
+    }
     # join our CRS reference table to our original WQP dataframe:
     sf <- .data %>%
       tibble::rowid_to_column(var = "index") %>%
       dplyr::mutate(
         lat = as.numeric(LatitudeMeasure),
-        lon = as.numeric(LongitudeMeasure)
+        lon = as.numeric(LongitudeMeasure),
+        # If `HorizontalCoordinateReferenceSystemDatumName` is NA...
+        HorizontalCoordinateReferenceSystemDatumName = ifelse(is.na(HorizontalCoordinateReferenceSystemDatumName),
+          # ... assign it the same crs as the user-supplied crs:
+          paste0(epsg_codes %>% dplyr::filter(epsg == as.numeric(crs)) %>% .[1, 1]),
+          # otherwise, preserve the original crs
+          HorizontalCoordinateReferenceSystemDatumName
+        )
       ) %>%
       # Add EPSG codes
       dplyr::left_join(
@@ -89,17 +104,18 @@ TADA_MakeSpatial <- function(.data, crs = 4326) {
       dplyr::arrange(index) %>%
       dplyr::select(-c(index, epsg))
   }))
-  
+
   return(sf)
 }
 
 
 #' fetchATTAINS
 #'
-#' Fetch ATTAINS features (entity submitted points, lines, polygons representing their assessment units; and EPA snapshot of the associated NHDPlus HR catchments that the entity submitted features fall within) within a bounding box produced from a set of TADA spatial features.
+#' Fetches ATTAINS features (state- or tribe- or other entity- submitted points, lines, and polygons representing their assessment units; and the EPA snapshot of the associated NHDPlus HR catchments that the state- or tribe- or other entity- submitted features fall within) within a bounding box produced from a set of TADA spatial features.
 #'
 #' @param .data A dataframe developed using `TADA_DataRetrieval()` or `TADA_MakeSpatial()`.
-#' @return spatial features (ATTAINS_catchments, ATTAINS_points, ATTAINS_lines, and ATTAINS_polygons) that are within the spatial bounding box of water quality observations.
+#' @param catchments_only Whether to return just the summarized ATTAINS catchment features, or both the catchments and raw ATTAINS features. TRUE or FALSE.
+#' @return Spatial features (ATTAINS_catchments, ATTAINS_points, ATTAINS_lines, and ATTAINS_polygons) that are within the spatial bounding box of water quality observations.
 #'
 #' @seealso [TADA_MakeSpatial()]
 #' @seealso [TADA_DataRetrieval()]
@@ -114,32 +130,44 @@ TADA_MakeSpatial <- function(.data, crs = 4326) {
 #'   applyautoclean = TRUE
 #' )
 #'
-#' nv_attains_features <- fetchATTAINS(tada_data)
+#' nv_attains_features <- fetchATTAINS(tada_data, catchments_only = FALSE)
 #' }
-fetchATTAINS <- function(.data) {
-  if (is.null(.data) | nrow(.data) == 0) {
-    stop("There is no data in your `data` object to use as a bounding box for selecting ATTAINS features.")
-  }
-  
+fetchATTAINS <- function(.data, catchments_only = FALSE) {
+  sf::sf_use_s2(FALSE)
+
+  print("Depending on your data's observation count and its spatial range, the ATTAINS pull may take a while.")
+
   # EPSG we want our ATTAINS data to be in (always 4326 for this function)
   our_epsg <- 4326
-  
+
   # If data is already spatial, just make sure it is in the right CRS
   # and add an index as the WQP observations' unique identifier...
   if (!is.null(.data) & inherits(.data, "sf")) {
     if (sf::st_crs(.data)$epsg != our_epsg) {
       .data <- .data %>%
-        sf::st_transform(our_epsg)
+        sf::st_transform(our_epsg) %>%
+        dplyr::distinct(geometry, .keep_all = TRUE)
     } else {
-      .data <- .data
+      .data <- .data %>%
+        dplyr::distinct(geometry, .keep_all = TRUE)
     }
+  } else if (!"LongitudeMeasure" %in% colnames(.data) |
+    !"LatitudeMeasure" %in% colnames(.data) |
+    !"HorizontalCoordinateReferenceSystemDatumName" %in% colnames(.data)) {
+    stop("The dataframe does not contain WQP-style latitude and longitude data (column names `HorizontalCoordinateReferenceSystemDatumName`, `LatitudeMeasure`, and `LongitudeMeasure`.")
   } else {
     # ... Otherwise transform into a spatial object then do the same thing:
     .data <- .data %>%
+      data.table::data.table(.) %>%
+      dplyr::distinct(LongitudeMeasure, LatitudeMeasure, .keep_all = TRUE) %>%
       # convert dataframe to a spatial object
       TADA_MakeSpatial(.data = ., crs = our_epsg)
   }
-  
+
+  if (is.null(.data) | nrow(.data) == 0) {
+    stop("There is no data in your `data` object to use as a bounding box for selecting ATTAINS features.")
+  }
+
   baseurls <- c( # ATTAINS catchments:
     "https://gispub.epa.gov/arcgis/rest/services/OW/ATTAINS_Assessment/MapServer/3/query?",
     # ATTAINS points:
@@ -149,30 +177,32 @@ fetchATTAINS <- function(.data) {
     # ATTAINS polygons:
     "https://gispub.epa.gov/arcgis/rest/services/OW/ATTAINS_Assessment/MapServer/2/query?"
   )
-  
-  feature_downloader <- function(baseurls) {
+
+  # bounding box of user's WQP data
+  suppressMessages(suppressWarnings({
+    bbox_raw <- .data %>%
+      sf::st_bbox(.)
+    bbox <- bbox_raw %>%
+      # convert bounding box to characters
+      toString(.) %>%
+      # encode for use within the API URL
+      urltools::url_encode(.)
+  }))
+
+
+  feature_downloader <- function(baseurls, sf_bbox) {
     # starting at feature 1 (i.e., no offset):
     offset <- 0
     # empty list to store all features in
     all_features <- list()
-    
-    # bounding box (with some minor wiggle) of user's WQP data
-    suppressMessages(suppressWarnings({
-      bbox <- .data %>%
-        sf::st_bbox(.) %>%
-        # convert bounding box to characters
-        toString(.) %>%
-        # encode for use within the API URL
-        urltools::url_encode(.)
-    }))
-    
+
     # The ATTAINS API has a limit of 2000 features that can be pulled in at once.
     # Therefore, we must split the call into manageable "chunks" using a moving
     # window of what features to pull in, then munging all the separate API calls
     # together.
-    
+
     repeat {
-      query <- urltools::param_set(baseurls, key = "geometry", value = bbox) %>%
+      query <- urltools::param_set(baseurls, key = "geometry", value = sf_bbox) %>%
         urltools::param_set(key = "inSR", value = our_epsg) %>%
         # Total of 2000 features at a time...
         urltools::param_set(key = "resultRecordCount", value = 2000) %>%
@@ -191,7 +221,7 @@ fetchATTAINS <- function(.data) {
         urltools::param_set(key = "returnDistinctValues", value = "false") %>%
         urltools::param_set(key = "returnExtentOnly", value = "false") %>%
         urltools::param_set(key = "featureEncoding", value = "esriDefault")
-      
+
       # Fetch features within the offset window and append to list:
       features <- suppressMessages(suppressWarnings({
         tryCatch(
@@ -203,243 +233,975 @@ fetchATTAINS <- function(.data) {
           }
         )
       }))
-      
-      
+
       # Exit loop if no more features or error occurred
       if (is.null(features) || nrow(features) == 0) {
         break
       }
-      
+
       all_features <- c(all_features, list(features))
       # once done, change offset by 2000 features:
       offset <- offset + 2000
-      
+
       if (offset == 4000) {
-        print("Your TADA data covers a large spatial range. The ATTAINS pull may take a while.")
+
+      }
+    }
+
+    all_features <- dplyr::bind_rows(all_features) %>%
+      # remove duplicate features (precautionary)
+      dplyr::distinct(.keep_all = TRUE)
+  }
+
+  # Function used to grab assessment unit "WaterType".
+  # Sweet spot chunk size wise is 200:
+  split_vector <- function(vector, chunk_size = 200) {
+    # Number of chunks needed
+    num_chunks <- ceiling(length(vector) / chunk_size)
+
+    # Split the vector into chunks
+    chunks <- split(vector, ceiling(seq_along(vector) / chunk_size))
+
+    return(chunks)
+  }
+
+  # If the area of the bbox is massive (about the area of California or larger), AND there
+  # aren't that many actual monitoring locations (100)... OR the bbox is about the size of New Hampshire, and the observations are under 25...
+  # ... speed up processing by going site-by-site:
+  if (nrow(.data) <= 100 & as.numeric(sf::st_area(sf::st_as_sfc(bbox_raw))) >= 4e+11 || nrow(.data) <= 25 & as.numeric(sf::st_area(sf::st_as_sfc(bbox_raw))) >= 1e+11) {
+    catchment_features <- vector("list", length = nrow(.data))
+
+    for (i in 1:nrow(.data)) {
+      # bounding box of user's WQP data
+      suppressMessages(suppressWarnings({
+        bbox <- .data[i, ] %>%
+          sf::st_buffer(0.0000001) %>%
+          sf::st_bbox(.) %>%
+          # convert bounding box to characters
+          toString(.) %>%
+          # encode for use within the API URL
+          urltools::url_encode(.)
+      }))
+
+      catchment_features[[i]] <- feature_downloader(baseurls = baseurls[1], sf_bbox = bbox)
+    }
+
+    catchment_features <- catchment_features %>%
+      purrr::keep(~ nrow(.) > 0) %>%
+      dplyr::bind_rows()
+
+    if (length(catchment_features) == 0 || is.null(catchment_features)) {
+      print("There are no ATTAINS features associated with your area of interest.")
+    } else {
+      ## GRABBING WATER TYPE:
+
+      # Use ATTAINS API to grab, for each assessment unit, its WaterType.
+      # Query the API in "chunks" so it doesn't break. Sweet spot is ~200:
+      all_units <- unique(catchment_features$assessmentunitidentifier)
+      chunks <- split_vector(all_units, chunk_size = 200)
+      water_types <- vector("list", length = length(chunks))
+
+      for (i in 1:length(chunks)) {
+        dat <- httr::GET(paste0("https://attains.epa.gov/attains-public/api/assessmentUnits?assessmentUnitIdentifier=", paste(chunks[[i]], collapse = ","))) %>%
+          httr::content(., as = "text", encoding = "UTF-8") %>%
+          jsonlite::fromJSON(.)
+
+        water_types[[i]] <- dat[["items"]] %>%
+          tidyr::unnest("assessmentUnits") %>%
+          tidyr::unnest("waterTypes") %>%
+          dplyr::select(
+            assessmentUnitIdentifier,
+            waterTypeCode
+          )
+      }
+
+      water_types <- dplyr::bind_rows(water_types)
+
+      try(catchment_features <- dplyr::left_join(catchment_features, water_types, by = c("assessmentunitidentifier" = "assessmentUnitIdentifier")))
+    }
+
+    # If only interested in grabbing catchment data, return just the catchments
+    if (catchments_only == TRUE) {
+      return(list("ATTAINS_catchments" = catchment_features))
+    }
+
+    # Otherwise, start grabbing the raw ATTAINS features that intersect those
+    # catchments
+    points <- vector("list", length = nrow(catchment_features))
+    lines <- vector("list", length = nrow(catchment_features))
+    polygons <- vector("list", length = nrow(catchment_features))
+
+    for (i in 1:nrow(catchment_features)) {
+      # bounding box of catchments
+      suppressMessages(suppressWarnings({
+        bbox <- catchment_features[i, ] %>%
+          sf::st_bbox(.) %>%
+          # convert bounding box to characters
+          toString(.) %>%
+          # encode for use within the API URL
+          urltools::url_encode(.)
+      }))
+
+      points[[i]] <- feature_downloader(baseurls = baseurls[2], sf_bbox = bbox)
+      lines[[i]] <- feature_downloader(baseurls = baseurls[3], sf_bbox = bbox)
+      polygons[[i]] <- feature_downloader(baseurls = baseurls[4], sf_bbox = bbox)
+    }
+
+    points <- points %>%
+      purrr::keep(~ nrow(.) > 0) %>%
+      dplyr::bind_rows()
+    try(
+      points <- points %>% dplyr::left_join(., water_types, by = c("assessmentunitidentifier" = "assessmentUnitIdentifier")),
+      silent = TRUE
+    )
+
+    lines <- lines %>%
+      purrr::keep(~ nrow(.) > 0) %>%
+      dplyr::bind_rows()
+    try(
+      lines <- lines %>% dplyr::left_join(., water_types, by = c("assessmentunitidentifier" = "assessmentUnitIdentifier")),
+      silent = TRUE
+    )
+
+    polygons <- polygons %>%
+      purrr::keep(~ nrow(.) > 0) %>%
+      dplyr::bind_rows()
+    try(
+      polygons <- polygons %>% dplyr::left_join(., water_types, by = c("assessmentunitidentifier" = "assessmentUnitIdentifier")),
+      silent = TRUE
+    )
+
+    final_features <- list(
+      "ATTAINS_catchments" = catchment_features,
+      "ATTAINS_points" = points,
+      "ATTAINS_lines" = lines,
+      "ATTAINS_polygons" = polygons
+    )
+
+    return(final_features)
+
+    # Otherwise, just use the bbox in one pull:
+  } else {
+    catchment_features <- feature_downloader(baseurls = baseurls[1], sf_bbox = bbox)
+
+    if (length(catchment_features) == 0 || is.null(catchment_features)) {
+      print("There are no ATTAINS features associated with your area of interest.")
+    } else {
+      ## GRABBING WATER TYPE:
+
+      # Use ATTAINS API to grab, for each assessment unit, its WaterType.
+      # Query the API in "chunks" so it doesn't break:
+      all_units <- unique(catchment_features$assessmentunitidentifier)
+      chunks <- split_vector(all_units, chunk_size = 200)
+      water_types <- vector("list", length = length(chunks))
+
+      for (i in 1:length(chunks)) {
+        dat <- httr::GET(paste0("https://attains.epa.gov/attains-public/api/assessmentUnits?assessmentUnitIdentifier=", paste(chunks[[i]], collapse = ","))) %>%
+          httr::content(., as = "text", encoding = "UTF-8") %>%
+          jsonlite::fromJSON(.)
+
+        water_types[[i]] <- dat[["items"]] %>%
+          tidyr::unnest("assessmentUnits") %>%
+          tidyr::unnest("waterTypes") %>%
+          dplyr::select(
+            assessmentUnitIdentifier,
+            waterTypeCode
+          )
       }
+
+      water_types <- dplyr::bind_rows(water_types)
+
+      try(catchment_features <- dplyr::left_join(catchment_features, water_types, by = c("assessmentunitidentifier" = "assessmentUnitIdentifier")), silent = TRUE)
+    }
+
+    # If only interested in grabbing catchment data, return just the catchments
+    if (catchments_only == TRUE) {
+      return(list("ATTAINS_catchments" = catchment_features))
     }
-    
-    all_features <- dplyr::bind_rows(all_features)
+
+    # Otherwise, start grabbing the raw ATTAINS features that intersect those
+    # catchments
+
+    # bounding box of catchments
+    try(suppressMessages(suppressWarnings({
+      bbox <- catchment_features %>%
+        sf::st_bbox(.) %>%
+        # convert bounding box to characters
+        toString(.) %>%
+        # encode for use within the API URL
+        urltools::url_encode(.)
+    })), silent = TRUE)
+
+    # Download associated point, line, and polygon features using catchment bbox
+    other_features <- baseurls[2:4] %>%
+      purrr::map(function(baseurl) {
+        features <- feature_downloader(baseurls = baseurl, sf_bbox = bbox)
+
+        if (!is.null(features) && nrow(features) > 0) {
+          features <- try(dplyr::left_join(features, water_types, by = c("assessmentunitidentifier" = "assessmentUnitIdentifier")), silent = TRUE)
+        }
+
+        return(features)
+      })
+
+    final_features <- list(
+      "ATTAINS_catchments" = catchment_features,
+      "ATTAINS_points" = other_features[[1]],
+      "ATTAINS_lines" = other_features[[2]],
+      "ATTAINS_polygons" = other_features[[3]]
+    )
+
+    return(final_features)
   }
-  
-  final_features <- baseurls %>%
-    purrr::map(~ feature_downloader(.))
-  
-  names(final_features) <- c("ATTAINS_catchments", "ATTAINS_points", "ATTAINS_lines", "ATTAINS_polygons")
-  
-  return(final_features)
 }
 
 
-#' TADA_GetATTAINS
+#' fetchNHD
 #'
-#' Link catchment-based ATTAINS assessment unit data (EPA snapshot of NHDPlus HR catchments associated with entity submitted assessment unit features - points, lines, and polygons) to Water Quality Portal observations, often imported via `TADA_DataRetrieval()`. This function returns the same raw objects that are mapped in `TADA_ViewATTAINS()`.
+#' Fetches NHD features from either the high resolution or medium resolution version of the National Hydrography Dataset (NHD) that intersect catchments containing TADA Water Quality Portal observations.
 #'
-#' Adds the following ATTAINS columns to the input dataframe or list: "ATTAINS.organizationid", "ATTAINS.submissionid", "ATTAINS.hasprotectionplan", "ATTAINS.assessmentunitname", "ATTAINS.nhdplusid", "ATTAINS.tas303d", "ATTAINS.isthreatened", "ATTAINS.state", "ATTAINS.on303dlist", "ATTAINS.organizationname", "ATTAINS.region", "ATTAINS.Shape_Length", "ATTAINS.reportingcycle", "ATTAINS.assmnt_joinkey", "ATTAINS.hastmdl", "ATTAINS.orgtype", "ATTAINS.permid_joinkey", "ATTAINS.catchmentistribal", "ATTAINS.ircategory", "ATTAINS.waterbodyreportlink", "ATTAINS.assessmentunitidentifier", "ATTAINS.overallstatus", "ATTAINS.isassessed", "ATTAINS.isimpaired", "ATTAINS.has4bplan", "ATTAINS.huc12", "ATTAINS.hasalternativeplan", "ATTAINS.visionpriority303d", "ATTAINS.areasqkm", "ATTAINS.catchmentareasqkm", "ATTAINS.catchmentstatecode", "ATTAINS.catchmentresolution", "ATTAINS.Shape_Area". Check out the TADAModule2.Rmd for an example workflow.
-#'
-#' @param .data A dataframe created by `TADA_DataRetrieval()` or the sf equivalent made by `TADA_MakeSpatial()`.
-#' @param return_sf Whether to return the associated ATTAINS_catchments, ATTAINS_lines, ATTAINS_points, and ATTAINS_polygons shapefile objects. TRUE (yes, return) or FALSE (no, do not return). All ATTAINS features are in WGS84 (crs = 4326).
+#' @param .data A dataframe created by `TADA_DataRetrieval()` or the geospatial equivalent made by `TADA_MakeSpatial()`.
+#' @param resolution Whether to download the NHDPlus HiRes resolution ("Hi") or medium NHDPlus V2 resolution ("Med") version of the National Hydrography Dataset (NHD). Default is "Hi".
+#' @param features Which NHD features to return: "catchments", "flowlines", "waterbodies", or any combination.
 #'
-#' @return A modified `TADA_DataRetrieval()` dataframe with additional columns associated with the ATTAINS assessment unit data. Or, if return_sf = TRUE, a list containing that same data frame plus the raw ATTAINS features associated with those observations.
+#' @return A list containing all selected NHD features associated with the WQP observations of interest. Or, if a single feature type is selected, a single geospatial object instead of a list. Default is "catchments" only.
 #'
 #' @seealso [TADA_DataRetrieval()]
 #' @seealso [TADA_MakeSpatial()]
-#' @seealso [TADA_ViewATTAINS()]
-#'
-#' @export
 #'
 #' @examples
 #' \dontrun{
 #' tada_data <- TADA_DataRetrieval(
-#'   startDate = "2018-05-01",
-#'   endDate = "2018-09-30",
+#'   startDate = "1990-01-01",
+#'   endDate = "1990-01-15",
 #'   characteristicName = "pH",
-#'   statecode = "IL",
+#'   statecode = "CO",
 #'   applyautoclean = TRUE
 #' )
 #'
-#' tada_attains <- TADA_GetATTAINS(tada_data, return_sf = FALSE)
-#'
-#' tada_attains_list <- TADA_GetATTAINS(tada_data, return_sf = TRUE)
+#' nhd_data <- fetchNHD(.data = tada_data, resolution = "Hi", features = c("catchments", "waterbodies", "flowlines"))
 #' }
-TADA_GetATTAINS <- function(.data, return_sf = TRUE) {
-  attains_names <- c(
-    "ATTAINS.organizationid", "ATTAINS.submissionid", "ATTAINS.hasprotectionplan",
-    "ATTAINS.assessmentunitname", "ATTAINS.nhdplusid", "ATTAINS.tas303d",
-    "ATTAINS.isthreatened", "ATTAINS.state", "ATTAINS.on303dlist",
-    "ATTAINS.organizationname", "ATTAINS.region", "ATTAINS.Shape_Length",
-    "ATTAINS.reportingcycle", "ATTAINS.assmnt_joinkey", "ATTAINS.hastmdl",
-    "ATTAINS.orgtype", "ATTAINS.permid_joinkey", "ATTAINS.catchmentistribal",
-    "ATTAINS.ircategory", "ATTAINS.waterbodyreportlink", "ATTAINS.assessmentunitidentifier",
-    "ATTAINS.overallstatus", "ATTAINS.isassessed", "ATTAINS.isimpaired",
-    "ATTAINS.has4bplan", "ATTAINS.huc12", "ATTAINS.hasalternativeplan",
-    "ATTAINS.visionpriority303d", "ATTAINS.areasqkm", "ATTAINS.catchmentareasqkm",
-    "ATTAINS.catchmentstatecode", "ATTAINS.catchmentresolution", "ATTAINS.Shape_Area"
-  )
-  
-  if (any(attains_names %in% colnames(.data))) {
-    stop("Your data has already been joined with ATTAINS data.")
-  }
-  
-  if (nrow(.data) == 0) {
-    print("Your Water Quality Portal dataframe has no observations. Returning an empty dataframe with empty ATTAINS features.")
-    
-    # if no WQP observations, return a modified `data` with empty ATTAINS-related columns:
-    
-    col_val_list <- stats::setNames(
-      object = rep(
-        x = list(NA),
-        times = length(attains_names)
-      ),
-      nm = attains_names
-    )
-    
-    # Add ATTAINS columns with NA values
-    no_WQP_data <- .data %>%
-      dplyr::mutate(index = NA) %>%
-      dplyr::bind_cols(col_val_list)
-    
-    # In this case we'll need to return empty ATTAINS objects
-    if (return_sf == TRUE) {
-      ATTAINS_catchments <- NULL
-      ATTAINS_lines <- NULL
-      ATTAINS_points <- NULL
-      ATTAINS_polygons <- NULL
-      
-      return(list(
-        "TADA_with_ATTAINS" = no_WQP_data,
-        "ATTAINS_catchments" = ATTAINS_catchments,
-        "ATTAINS_points" = ATTAINS_points,
-        "ATTAINS_lines" = ATTAINS_lines,
-        "ATTAINS_polygons" = ATTAINS_polygons
-      ))
-      # If ATTAINS objects not requested, then just return the dataframe:
-    } else {
-      return(no_WQP_data)
-    }
-  }
-  
-  # If data doesn't already contain ATTAINS data and isn't an empty dataframe:
+fetchNHD <- function(.data, resolution = "Hi", features = "catchments") {
   suppressMessages(suppressWarnings({
-    sf::sf_use_s2(FALSE)
-    
+    # sf::sf_use_s2(TRUE)
     # If data is already spatial, just make sure it is in the right CRS
-    # and add unique WQP ID for identifying obs with more than one ATTAINS assessment unit
-    
     if (!is.null(.data) & inherits(.data, "sf")) {
       if (sf::st_crs(.data)$epsg != 4326) {
-        TADA_DataRetrieval_data <- .data %>%
-          sf::st_transform(4326) %>%
-          tibble::rowid_to_column(var = "index")
+        geospatial_data <- .data %>%
+          sf::st_transform(4326)
       } else {
-        TADA_DataRetrieval_data <- .data %>%
-          tibble::rowid_to_column(var = "index")
+        geospatial_data <- .data
       }
     } else {
       # ... Otherwise transform into a spatial object then do the same thing:
-      TADA_DataRetrieval_data <- .data %>%
+      geospatial_data <- .data %>%
         # convert dataframe to a spatial object
         TADA_MakeSpatial(.data = ., crs = 4326) %>%
-        # add unique WQP ID for identifying obs with more than one ATTAINS assessment unit
-        tibble::rowid_to_column(var = "index")
+        dplyr::mutate(geometry_join = geometry)
     }
   }))
-  
-  attains_features <- try(fetchATTAINS(.data = TADA_DataRetrieval_data), silent = TRUE)
-  
-  suppressMessages(suppressWarnings({
-    # grab the ATTAINS catchments within our WQP bbox:
-    nearby_catchments <- NULL
-    # (Wrapped with "try" because it is possible that no ATTAINS data exists in the bbox.)
-    try(
-      nearby_catchments <- attains_features[["ATTAINS_catchments"]] %>%
-        # remove unnecessary columns:
-        dplyr::select(-c(OBJECTID, GLOBALID)) %>%
-        # select only catchments that have WQP observations in them:
-        .[TADA_DataRetrieval_data, ] %>%
-        # add prefix "ATTAINS" to ATTAINS data
-        dplyr::rename_with(~ paste0("ATTAINS.", .), dplyr::everything()) %>%
-        # get rid of dupes (as a precaution)
-        dplyr::distinct(.keep_all = TRUE),
-      silent = TRUE
-    )
-  }))
-  
-  # if no ATTAINS data, return original dataframe with empty ATTAINS columns:
-  if (is.null(nearby_catchments)) {
-    print("There are no ATTAINS features associated with these WQP observations. Returning original dataframe with empty ATTAINS columns and empty ATTAINS geospatial features.")
-    
-    col_val_list <- stats::setNames(
-      object = rep(
-        x = list(NA),
-        times = length(attains_names)
-      ),
-      nm = attains_names
-    )
-    
-    # return a modified `.data` with empty ATTAINS-related columns:
-    no_ATTAINS_data <- .data %>%
-      dplyr::bind_cols(col_val_list) %>%
-      tibble::rowid_to_column(var = "index")
-    
-    if (return_sf == TRUE) {
-      ATTAINS_catchments <- NULL
-      ATTAINS_lines <- NULL
-      ATTAINS_points <- NULL
-      ATTAINS_polygons <- NULL
-      
-      return(list(
-        "TADA_with_ATTAINS" = no_ATTAINS_data,
-        "ATTAINS_catchments" = ATTAINS_catchments,
-        "ATTAINS_points" = ATTAINS_points,
-        "ATTAINS_lines" = ATTAINS_lines,
-        "ATTAINS_polygons" = ATTAINS_polygons
-      ))
-    } else {
-      return(no_ATTAINS_data)
-    }
-    
-    # If there IS ATTAINS data...
-  } else {
+
+  # Reduce WQP data to unique coordinates
+  unique_sites <- dplyr::distinct(geospatial_data, geometry)
+
+  # If user wants HighRes NHD...
+  if (resolution %in% c("Hi", "hi")) {
     suppressMessages(suppressWarnings({
-      # ... link WQP features to the ATTAINS catchment feature(s) they land in:
-      TADA_with_ATTAINS <- TADA_DataRetrieval_data %>%
-        # left join = TRUE to preserve all observations (with or without ATTAINS features):
-        sf::st_join(., nearby_catchments, left = TRUE)
-      
-      if (return_sf == FALSE) {
-        return(TADA_with_ATTAINS)
+      # Map server for NHDPlus_HR that is used to download features:
+      nhd_plus_hr_url <- "https://hydro.nationalmap.gov/arcgis/rest/services/NHDPlus_HR/MapServer"
+
+      # bounding box of user's WQP data
+
+      wqp_bboxes <- unique_sites %>%
+        sf::st_buffer(1e-07) %>%
+        dplyr::rowwise() %>%
+        dplyr::mutate(bbox = purrr::map(geometry, sf::st_bbox)) %>%
+        sf::st_as_sfc()
+
+      # open the nhd_hr - which contains a bunch of layers
+      nhd_hr <- arcgislayers::arc_open(nhd_plus_hr_url)
+
+      # list the layers of the nhdhr object
+      # arcgislayers::list_items(nhd_hr)
+
+      # select the layer by id from the items list called above (10 is HR catchments)
+      nhd_hr_catchments <- arcgislayers::get_layer(nhd_hr, 10)
+
+      # use bboxes of the sites to return their associated catchments
+      nhd_catchments_stored <- vector("list", length = length(wqp_bboxes))
+
+      for (i in 1:length(wqp_bboxes)) {
+        try(
+          nhd_catchments_stored[[i]] <- arcgislayers::arc_select(nhd_hr_catchments,
+            filter_geom = wqp_bboxes[i],
+            crs = sf::st_crs(wqp_bboxes[i])
+          ) %>%
+            sf::st_make_valid(),
+          silent = TRUE
+        )
       }
-      
-      # CATCHMENT FEATURES
-      # use original catchment pull, but return column names to original
-      ATTAINS_catchments <- nearby_catchments
-      colnames(ATTAINS_catchments) <- gsub("ATTAINS.", "", colnames(ATTAINS_catchments))
-      # due to the rename, must re-set geometry column:
-      sf::st_geometry(ATTAINS_catchments) <- "geometry"
-      
-      # POINT FEATURES - try to pull point AU data if it exists. Otherwise, move on...
-      ATTAINS_points <- NULL
-      try(
-        ATTAINS_points <- attains_features[["ATTAINS_points"]] %>%
-          # subset to only ATTAINS point features in the same NHD HR catchments as WQP observations
-          .[nearby_catchments, ] %>%
-          # make sure no duplicate features exist
-          dplyr::distinct(assessmentunitidentifier, .keep_all = TRUE),
-        silent = TRUE
+
+      nhd_catchments_stored <- nhd_catchments_stored %>%
+        purrr::keep(~ !is.null(.)) %>%
+        dplyr::bind_rows() %>%
+        dplyr::distinct()
+
+      try(nhd_catchments_stored <- nhd_catchments_stored %>%
+        dplyr::select(nhdplusid,
+          catchmentareasqkm = areasqkm
+        ) %>%
+        dplyr::mutate(
+          NHD.nhdplusid = as.character(nhdplusid),
+          NHD.resolution = "HR",
+          NHD.catchmentareasqkm = as.numeric(catchmentareasqkm)
+        ) %>%
+        dplyr::select(NHD.nhdplusid, NHD.resolution, NHD.catchmentareasqkm, geometry), silent = TRUE)
+    }))
+
+    # Empty version of the df will be returned if no associated catchments
+    # to avoid breaking downstream fxns reliant on catchment info.
+    if (nrow(nhd_catchments_stored) == 0 && "catchments" %in% features) {
+      print("No NHD HR features associated with your area of interest.")
+      nhd_catchments_stored <- tibble::tibble(
+        NHD.nhdplusid = character(),
+        NHD.resolution = character(),
+        NHD.catchmentareasqkm = numeric()
       )
-      
-      # LINE FEATURES - try to pull line AU data if it exists. Otherwise, move on...
-      ATTAINS_lines <- NULL
-      try(
-        ATTAINS_lines <- attains_features[["ATTAINS_lines"]] %>%
+    }
+
+    if (nrow(nhd_catchments_stored) == 0 && !"catchments" %in% features) {
+      stop("No NHD HR features associated with your area of interest.")
+    }
+
+    if (length(features) == 1 && features == "catchments") {
+      return(nhd_catchments_stored)
+    }
+
+    # Grab flowlines -
+    if ("flowlines" %in% features && nrow(nhd_catchments_stored) > 0) {
+      suppressMessages(suppressWarnings({
+        # use catchments to grab other NHD features
+        geospatial_aoi <- nhd_catchments_stored %>%
+          sf::st_as_sfc()
+
+        # select the layer by id from the items list (3 is HR flowlines)
+        nhd_hr_flowlines <- arcgislayers::get_layer(nhd_hr, 3)
+
+        # use catchments to return associated flowlines
+        nhd_flowlines_stored <- vector("list", length = length(geospatial_aoi))
+
+        for (i in 1:length(geospatial_aoi)) {
+          try(
+            nhd_flowlines_stored[[i]] <- arcgislayers::arc_select(nhd_hr_flowlines,
+              filter_geom = geospatial_aoi[i],
+              crs = sf::st_crs(geospatial_aoi[i])
+            ) %>%
+              sf::st_make_valid(),
+            silent = TRUE
+          )
+
+          # so all returned meta data binds properly, must transform all columns into characters,
+          # EXCEPT for the geometry column:
+          try(
+            geometry_col <- sf::st_geometry(nhd_flowlines_stored[[i]]),
+            silent = TRUE
+          )
+
+          try(
+            nhd_flowlines_stored[[i]] <- nhd_flowlines_stored[[i]] %>%
+              dplyr::mutate(dplyr::across(dplyr::where(~ !identical(., geometry_col)), ~ as.character(.))),
+            silent = TRUE
+          )
+        }
+
+        nhd_flowlines_stored <- nhd_flowlines_stored %>%
+          purrr::keep(~ !is.null(.)) %>%
+          purrr::keep(~ !is.character(.)) %>%
+          dplyr::bind_rows() %>%
+          dplyr::distinct()
+      }))
+
+      if (length(features) == 1 && features == "flowlines") {
+        if (length(nhd_flowlines_stored) == 0 || is.null(nhd_flowlines_stored)) {
+          print("There are no NHD flowlines associated with your area of interest.")
+        }
+
+        return(nhd_flowlines_stored)
+      }
+
+      if (length(nhd_flowlines_stored) == 0 || is.null(nhd_flowlines_stored)) {
+        print("There are no NHD flowlines associated with your area of interest.")
+      }
+    }
+
+    # Grab waterbodies -
+    if ("waterbodies" %in% features & nrow(nhd_catchments_stored) > 0) {
+      suppressMessages(suppressWarnings({
+        geospatial_aoi <- nhd_catchments_stored %>%
+          sf::st_as_sfc()
+
+        # select the layer by id from the items list called above (9 is HR waterbodies)
+        nhd_hr_waterbodies <- arcgislayers::get_layer(nhd_hr, 9)
+
+        # use catchments to return associated waterbodies
+        nhd_waterbodies_stored <- vector("list", length = length(geospatial_aoi))
+
+        for (i in 1:length(geospatial_aoi)) {
+          try(
+            nhd_waterbodies_stored[[i]] <- arcgislayers::arc_select(nhd_hr_waterbodies,
+              # where = query,
+              filter_geom = geospatial_aoi[i],
+              crs = sf::st_crs(geospatial_aoi[i])
+            ) %>%
+              sf::st_make_valid(),
+            silent = TRUE
+          )
+
+          # so all returned meta data binds properly, must transform all columns into characters,
+          # EXCEPT for the geometry column:
+          try(
+            geometry_col <- sf::st_geometry(nhd_waterbodies_stored[[i]]),
+            silent = TRUE
+          )
+
+          try(
+            nhd_waterbodies_stored[[i]] <- nhd_waterbodies_stored[[i]] %>%
+              dplyr::mutate(dplyr::across(dplyr::where(~ !identical(., geometry_col)), ~ as.character(.))),
+            silent = TRUE
+          )
+        }
+
+        nhd_waterbodies_stored <- nhd_waterbodies_stored %>%
+          purrr::keep(~ !is.null(.)) %>%
+          purrr::keep(~ !is.character(.)) %>%
+          dplyr::bind_rows() %>%
+          dplyr::distinct()
+      }))
+
+      if (length(features) == 1 && features == "waterbodies") {
+        if (length(nhd_waterbodies_stored) == 0 || is.null(nhd_waterbodies_stored)) {
+          print("There are no NHD waterbodies associated with your area of interest.")
+        }
+
+        return(nhd_waterbodies_stored)
+      }
+
+      if (length(nhd_waterbodies_stored) == 0 || is.null(nhd_waterbodies_stored)) {
+        print("There are no NHD waterbodies associated with your area of interest.")
+      }
+    }
+
+    # Combinations of features selected, and what they return:
+
+    if (length(features) == 2 && "catchments" %in% features && "flowlines" %in% features) {
+      nhd_list <- list(
+        "NHD_catchments" = nhd_catchments_stored,
+        "NHD_flowlines" = nhd_flowlines_stored
+      )
+
+      return(nhd_list)
+    } else if (length(features) == 2 && "catchments" %in% features && "waterbodies" %in% features) {
+      nhd_list <- list(
+        "NHD_catchments" = nhd_catchments_stored,
+        "NHD_waterbodies" = nhd_waterbodies_stored
+      )
+
+      return(nhd_list)
+    } else if (length(features) == 2 && "flowlines" %in% features && "waterbodies" %in% features) {
+      nhd_list <- list(
+        "NHD_flowlines" = nhd_flowlines_stored,
+        "NHD_waterbodies" = nhd_waterbodies_stored
+      )
+
+      return(nhd_list)
+    } else if (length(features) == 3 && "catchments" %in% features && "flowlines" %in% features && "waterbodies" %in% features) {
+      nhd_list <- list(
+        "NHD_catchments" = nhd_catchments_stored,
+        "NHD_flowlines" = nhd_flowlines_stored,
+        "NHD_waterbodies" = nhd_waterbodies_stored
+      )
+    } else {
+      stop("Please select between 'catchments', 'flowlines', 'waterbodies', or any combination for `feature` argument.")
+    }
+
+    # If user wants NHDPlus V2...
+  } else if (resolution %in% c("Med", "med")) {
+    suppressMessages(suppressWarnings({
+      nhd_catchments <- vector("list", length = nrow(unique_sites))
+
+      for (i in 1:nrow(unique_sites)) {
+        # Use {nhdplusTools} to grab associated catchments...
+        try(
+          nhd_catchments[[i]] <- nhdplusTools::get_nhdplus(AOI = unique_sites[i, ], realization = "catchment") %>%
+            sf::st_make_valid() %>%
+            dplyr::select(
+              comid = featureid,
+              catchmentareasqkm = areasqkm
+            ) %>%
+            dplyr::mutate(
+              NHD.comid = as.character(comid),
+              NHD.resolution = "nhdplusV2",
+              NHD.catchmentareasqkm = as.numeric(catchmentareasqkm)
+            ) %>%
+            dplyr::select(NHD.comid, NHD.resolution, NHD.catchmentareasqkm, geometry),
+          silent = TRUE
+        )
+      }
+
+      nhd_catchments <- nhd_catchments %>%
+        purrr::keep(~ !is.null(.))
+
+      try(nhd_catchments <- dplyr::bind_rows(nhd_catchments) %>%
+        dplyr::distinct(), silent = TRUE)
+
+      # if NHD catchments are not in the correct CRS, transform them
+      try(if (sf::st_crs(nhd_catchments) != sf::st_crs(geospatial_data)) {
+        nhd_catchments <- nhd_catchments %>%
+          sf::st_transform(sf::st_crs(geospatial_data)$epsg)
+      }, silent = TRUE)
+    }))
+
+    if (nrow(nhd_catchments) == 0 && "catchments" %in% features) {
+      print("No NHDPlus V2 features associated with your WQP observations.")
+      nhd_catchments <- tibble::tibble(
+        NHD.comid = character(),
+        NHD.resolution = character(),
+        NHD.catchmentareasqkm = numeric()
+      )
+    }
+
+    if (nrow(nhd_catchments) == 0 && !"catchments" %in% features) {
+      stop("No NHDPlus V2 features associated with your WQP observations.")
+    }
+
+    if (length(features) == 1 && features == "catchments") {
+      return(nhd_catchments)
+    }
+
+
+    # Grab flowlines -
+    if ("flowlines" %in% features && nrow(nhd_catchments) > 0) {
+      suppressMessages(suppressWarnings({
+        nhd_flowlines <- vector("list", length = nrow(nhd_catchments))
+
+        # use catchments to grab other NHD features:
+        unique_sites <- nhd_catchments
+
+        for (i in 1:nrow(unique_sites)) {
+          # Use {nhdplusTools} to grab associated flowlines...
+          try(
+            nhd_flowlines[[i]] <- nhdplusTools::get_nhdplus(AOI = unique_sites[i, ], realization = "flowline") %>%
+              sf::st_make_valid(),
+            silent = TRUE
+          )
+
+          try(geometry_col <- sf::st_geometry(nhd_flowlines[[i]]),
+            silent = TRUE
+          )
+
+          try(
+            nhd_flowlines[[i]] <- nhd_flowlines[[i]] %>%
+              dplyr::mutate(dplyr::across(dplyr::where(~ !identical(., geometry_col)), ~ as.character(.))),
+            silent = TRUE
+          )
+        }
+
+        nhd_flowlines <- nhd_flowlines %>%
+          purrr::keep(~ !is.null(.))
+
+        try(nhd_flowlines <- dplyr::bind_rows(nhd_flowlines)) %>%
+          dplyr::distinct()
+
+        # if NHD flowlines are not in the correct CRS, transform them
+        try(if (sf::st_crs(nhd_flowlines) != sf::st_crs(geospatial_data)) {
+          nhd_flowlines <- nhd_flowlines %>%
+            sf::st_transform(sf::st_crs(geospatial_data)$epsg)
+        }, silent = TRUE)
+      }))
+
+      if (nrow(nhd_flowlines) == 0 && "flowlines" %in% features) {
+        print("No NHDPlus V2 flowlines associated with your WQP observations.")
+      }
+
+      if (length(features) == 1 && features == "flowlines") {
+        return(nhd_flowlines)
+      }
+    }
+
+    # Grab waterbodies -
+    if ("waterbodies" %in% features && nrow(nhd_catchments) > 0) {
+      suppressMessages(suppressWarnings({
+        nhd_waterbodies <- vector("list", length = nrow(nhd_catchments))
+
+        # use catchments to grab other NHD features:
+        unique_sites <- nhd_catchments
+
+        for (i in 1:nrow(unique_sites)) {
+          # Use {nhdplusTools} to grab associated flowlines...
+          try(
+            nhd_waterbodies[[i]] <- nhdplusTools::get_waterbodies(AOI = unique_sites[i, ]) %>%
+              sf::st_make_valid(),
+            silent = TRUE
+          )
+
+          try(geometry_col <- sf::st_geometry(nhd_waterbodies[[i]]),
+            silent = TRUE
+          )
+
+          try(
+            nhd_waterbodies[[i]] <- nhd_waterbodies[[i]] %>%
+              dplyr::mutate(dplyr::across(dplyr::where(~ !identical(., geometry_col)), ~ as.character(.))),
+            silent = TRUE
+          )
+        }
+
+        nhd_waterbodies <- nhd_waterbodies %>%
+          purrr::keep(~ !is.null(.))
+
+        try(
+          nhd_waterbodies <- dplyr::bind_rows(nhd_waterbodies) %>%
+            dplyr::distinct(),
+          silent = TRUE
+        )
+
+        # if NHD waterbodies are not in the correct CRS, transform them
+        try(if (sf::st_crs(nhd_waterbodies) != sf::st_crs(geospatial_data)) {
+          nhd_waterbodies <- nhd_waterbodies %>%
+            sf::st_transform(sf::st_crs(geospatial_data)$epsg)
+        }, silent = TRUE)
+      }))
+
+      if (nrow(nhd_waterbodies) == 0 && "waterbodies" %in% features) {
+        print("No NHDPlus V2 waterbodies associated with your WQP observations.")
+      }
+
+      if (length(features) == 1 && features == "waterbodies") {
+        return(nhd_waterbodies)
+      }
+    }
+
+    # Combinations of features selected, and what they return:
+
+    if (length(features) == 2 && "catchments" %in% features && "flowlines" %in% features) {
+      nhd_list <- list(
+        "NHD_catchments" = nhd_catchments,
+        "NHD_flowlines" = nhd_flowlines
+      )
+
+      return(nhd_list)
+    } else if (length(features) == 2 && "catchments" %in% features && "waterbodies" %in% features) {
+      nhd_list <- list(
+        "NHD_catchments" = nhd_catchments,
+        "NHD_waterbodies" = nhd_waterbodies
+      )
+
+      return(nhd_list)
+    } else if (length(features) == 2 && "flowlines" %in% features && "waterbodies" %in% features) {
+      nhd_list <- list(
+        "NHD_flowlines" = nhd_flowlines,
+        "NHD_waterbodies" = nhd_waterbodies
+      )
+
+      return(nhd_list)
+    } else if (length(features) == 3 && "catchments" %in% features && "flowlines" %in% features && "waterbodies" %in% features) {
+      nhd_list <- list(
+        "NHD_catchments" = nhd_catchments,
+        "NHD_flowlines" = nhd_flowlines,
+        "NHD_waterbodies" = nhd_waterbodies
+      )
+    } else {
+      stop("Please select between 'catchments', 'flowlines', 'waterbodies', or any combination for `feature` argument.")
+    }
+  } else {
+    stop('User-supplied resolution unavailable. Please select between "Med" or "Hi".')
+  }
+}
+
+
+
+#' TADA_GetATTAINS
+#'
+#' Link catchment-based ATTAINS assessment unit data (EPA snapshot of NHDPlus HR catchments associated with entity submitted assessment unit features - points, lines, and polygons) to Water Quality Portal observations, often imported via `TADA_DataRetrieval()`. This function returns the objects that can be mapped in `TADA_ViewATTAINS()`. Check out the
+#' TADAModule2.Rmd for an example workflow.
+#'
+#' Adds one new column to input dataframe, 'index', which identifies rows that are the same observation but are linked to multiple ATTAINS assessment units. It is possible for a single TADA WQP observation to have multiple ATTAINS assessment units linked to it and subsequently more than one row of data.
+#'
+#' If TADA_MakeSpatial has not yet been run, this function runs it which also adds another new column to the input dataframe, 'geometry', which allows for mapping and additional geospatial capabilities.
+#'
+#' @param .data A dataframe created by `TADA_DataRetrieval()` or the sf equivalent made by `TADA_MakeSpatial()`.
+#' @param fill_catchments Whether the user would like to return NHD catchments for WQP observations not associated with an ATTAINS assessment unit (TRUE or FALSE). When fill_catchments = TRUE, the returned list splits observations into two dataframes: WQP observations with ATTAINS catchment data, and WQP observations without ATTAINS catchment data. Defaults to FALSE.
+#' @param resolution If fill_catchments = TRUE, whether to use NHDPlus V2 "Med" catchments or NHDPlus HiRes "Hi" catchments. Default is NHDPlus HiRes ("Hi").
+#' @param return_sf Whether to return the ATTAINS associated catchments, lines, points, and polygon shapefile objects along with the data frame(s). TRUE (yes, return list) or FALSE (no, do not return). All shapefile features are in WGS84 (crs = 4326). If fill_catchments = TRUE and return_sf = TRUE, the function will additionally return the raw catchment features associated with the observations in TADA_without_ATTAINS in a new shapefile called without_ATTAINS_catchments. Defaults to TRUE.
+#'
+#' @return A modified `TADA_DataRetrieval()` dataframe or list with additional columns associated with the ATTAINS assessment unit data, and, if fill_catchments = TRUE, an additional dataframe of the observations without intersecting ATTAINS features.
+#' Moreover, if return_sf = TRUE, this function will additionally return the raw ATTAINS and catchment shapefile features associated with those observations.
+#'
+#' @seealso [TADA_DataRetrieval()]
+#' @seealso [TADA_MakeSpatial()]
+#' @seealso [TADA_ViewATTAINS()]
+#'
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' tada_data <- TADA_DataRetrieval(
+#'   startDate = "2018-05-01",
+#'   endDate = "2018-07-31",
+#'   characteristicName = "pH",
+#'   statecode = "IL",
+#'   applyautoclean = TRUE
+#' )
+#'
+#' # note: these example ATTAINS data retrieval queries below may take a long time (10+ minutes) to run
+#'
+#' tada_attains <- TADA_GetATTAINS(tada_data, fill_catchments = FALSE, return_sf = FALSE)
+#'
+#' tada_attains_sf <- TADA_GetATTAINS(tada_data, fill_catchments = FALSE, return_sf = TRUE)
+#'
+#' tada_attains_filled <- TADA_GetATTAINS(tada_data, fill_catchments = TRUE, resolution = "Hi", return_sf = FALSE)
+#'
+#' tada_attains_filled_sf <- TADA_GetATTAINS(tada_data, fill_catchments = TRUE, resolution = "Hi", return_sf = TRUE)
+#' }
+TADA_GetATTAINS <- function(.data, fill_catchments = FALSE, resolution = "Hi", return_sf = TRUE) {
+  sf::sf_use_s2(FALSE)
+
+  attains_names <- c(
+    "ATTAINS.organizationid", "ATTAINS.submissionid", "ATTAINS.hasprotectionplan",
+    "ATTAINS.assessmentunitname", "ATTAINS.nhdplusid", "ATTAINS.tas303d",
+    "ATTAINS.isthreatened", "ATTAINS.state", "ATTAINS.on303dlist",
+    "ATTAINS.organizationname", "ATTAINS.region", "ATTAINS.Shape_Length",
+    "ATTAINS.reportingcycle", "ATTAINS.assmnt_joinkey", "ATTAINS.hastmdl",
+    "ATTAINS.orgtype", "ATTAINS.permid_joinkey", "ATTAINS.catchmentistribal",
+    "ATTAINS.ircategory", "ATTAINS.waterbodyreportlink", "ATTAINS.assessmentunitidentifier",
+    "ATTAINS.overallstatus", "ATTAINS.isassessed", "ATTAINS.isimpaired",
+    "ATTAINS.has4bplan", "ATTAINS.huc12", "ATTAINS.hasalternativeplan",
+    "ATTAINS.visionpriority303d", "ATTAINS.areasqkm", "ATTAINS.catchmentareasqkm",
+    "ATTAINS.catchmentstatecode", "ATTAINS.catchmentresolution", "ATTAINS.waterTypeCode",
+    "ATTAINS.Shape_Area"
+  )
+
+  if (any(attains_names %in% colnames(.data))) {
+    stop("Your data has already been joined with ATTAINS data.")
+  }
+
+  if (nrow(.data) == 0) {
+    print("Your Water Quality Portal dataframe has no observations. Returning an empty dataframe with empty ATTAINS features.")
+
+    # if no WQP observations, return a modified `data` with empty ATTAINS-related columns:
+
+    # Add ATTAINS columns with NA values
+    col_val_list <- stats::setNames(
+      object = rep(
+        x = list(NA),
+        times = length(attains_names)
+      ),
+      nm = attains_names
+    )
+
+    no_WQP_data <- .data %>%
+      dplyr::mutate(index = NA) %>%
+      dplyr::bind_cols(col_val_list)
+
+    # In this case we'll need to return empty ATTAINS objects
+    if (return_sf == TRUE) {
+      ATTAINS_catchments <- NULL
+      ATTAINS_lines <- NULL
+      ATTAINS_points <- NULL
+      ATTAINS_polygons <- NULL
+
+      return(list(
+        "TADA_with_ATTAINS" = no_WQP_data,
+        "ATTAINS_catchments" = ATTAINS_catchments,
+        "ATTAINS_points" = ATTAINS_points,
+        "ATTAINS_lines" = ATTAINS_lines,
+        "ATTAINS_polygons" = ATTAINS_polygons
+      ))
+      # If ATTAINS objects not requested, then just return the dataframe:
+    } else {
+      return(no_WQP_data)
+    }
+  }
+
+  # If data doesn't already contain ATTAINS data and isn't an empty dataframe:
+  suppressMessages(suppressWarnings({
+    # If data is already spatial, just make sure it is in the right CRS
+    # and add unique WQP ID for identifying obs with more than one ATTAINS assessment unit
+
+    if (!is.null(.data) & inherits(.data, "sf")) {
+      if (sf::st_crs(.data)$epsg != 4326) {
+        TADA_DataRetrieval_data <- .data %>%
+          sf::st_transform(4326) %>%
+          tibble::rowid_to_column(var = "index")
+      } else {
+        TADA_DataRetrieval_data <- .data %>%
+          tibble::rowid_to_column(var = "index")
+      }
+    } else {
+      # ... Otherwise transform into a spatial object then do the same thing:
+      TADA_DataRetrieval_data <- .data %>%
+        # convert dataframe to a spatial object
+        TADA_MakeSpatial(.data = ., crs = 4326) %>%
+        # add unique WQP ID for identifying obs with more than one ATTAINS assessment unit
+        tibble::rowid_to_column(var = "index")
+    }
+  }))
+
+  if (return_sf == TRUE) {
+    # grab all ATTAINS features that intersect our WQP objects:
+    attains_features <- try(fetchATTAINS(.data = TADA_DataRetrieval_data), silent = TRUE)
+  }
+
+  if (return_sf == FALSE) {
+    # grab all ATTAINS features that intersect our WQP objects:
+    attains_features <- try(fetchATTAINS(.data = TADA_DataRetrieval_data, catchments_only = FALSE), silent = TRUE)
+  }
+
+  # Tidy up the intersecting catchment objects:
+  suppressMessages(suppressWarnings({
+    nearby_catchments <- NULL
+    # (Wrapped with "try" because it is possible that no ATTAINS data exists in the bbox.)
+    try(
+      nearby_catchments <- attains_features[["ATTAINS_catchments"]] %>%
+        # remove unnecessary columns:
+        dplyr::select(-c(OBJECTID, GLOBALID)) %>%
+        # select only catchments that have WQP observations in them:
+        .[TADA_DataRetrieval_data, ] %>%
+        # add prefix "ATTAINS" to ATTAINS data
+        dplyr::rename_with(~ paste0("ATTAINS.", .), dplyr::everything()) %>%
+        # get rid of dupes (as a precaution)
+        dplyr::distinct(.keep_all = TRUE),
+      silent = TRUE
+    )
+    if (is.null(nearby_catchments) || nrow(nearby_catchments) == 0) {
+      nearby_catchments <- NULL
+    }
+  }))
+
+  # If no ATTAINS data associated with WQP obs...
+  if (is.null(nearby_catchments)) {
+    col_val_list <- stats::setNames(
+      object = rep(
+        x = list(NA),
+        times = length(attains_names)
+      ),
+      nm = attains_names
+    )
+
+    # ...return a modified `.data` with empty ATTAINS-related columns:
+    no_ATTAINS_data <- .data %>%
+      dplyr::bind_cols(col_val_list) %>%
+      tibble::rowid_to_column(var = "index")
+
+    print("There are no ATTAINS catchments associated with these WQP observations. Returning an empty data frame for `TADA_with_ATTAINS`.")
+
+    if (fill_catchments == FALSE) {
+      # If there are no intersecting ATTAINS catchments, fill_catchments = FALSE, and return_sf = TRUE, return empty sf features with the
+      # empty TADA_with_ATTAINS df.
+      if (return_sf == TRUE) {
+        ATTAINS_catchments <- NULL
+        ATTAINS_lines <- NULL
+        ATTAINS_points <- NULL
+        ATTAINS_polygons <- NULL
+
+        return(list(
+          "TADA_with_ATTAINS" = no_ATTAINS_data,
+          "ATTAINS_catchments" = ATTAINS_catchments,
+          "ATTAINS_points" = ATTAINS_points,
+          "ATTAINS_lines" = ATTAINS_lines,
+          "ATTAINS_polygons" = ATTAINS_polygons
+        ))
+      } else {
+        # If there are no intersecting ATTAINS catchments, fill_catchments = FALSE, and return_sf = FALSE, just return the
+        # empty TADA_with_ATTAINS df.
+        return(no_ATTAINS_data)
+      }
+    } else if (fill_catchments == TRUE) {
+      # "Downloading NHD data to fill in missing ATTAINS features. Depending on the number of observations and their spatial extent, this might take a while...
+      nhd_catchments <- fetchNHD(.data = TADA_DataRetrieval_data, resolution = resolution)
+
+      TADA_without_ATTAINS <- TADA_DataRetrieval_data %>%
+        sf::st_join(nhd_catchments, left = TRUE)
+
+      # If there are no intersecting ATTAINS catchments, fill_catchments = TRUE, and return_sf = TRUE, return empty sf features with the
+      # empty TADA_with_ATTAINS df PLUS the intersecting NHD catchment features of choice and TADA_without_ATTAINS dataframe.
+      if (return_sf == TRUE) {
+        ATTAINS_catchments <- NULL
+        ATTAINS_lines <- NULL
+        ATTAINS_points <- NULL
+        ATTAINS_polygons <- NULL
+
+        return(list(
+          # must remove all obs from TADA_with_ATTAINS, since all exist in TADA_without_ATTAINS
+          "TADA_with_ATTAINS" = no_ATTAINS_data[0, ],
+          "TADA_without_ATTAINS" = TADA_without_ATTAINS,
+          "ATTAINS_catchments" = ATTAINS_catchments,
+          "ATTAINS_points" = ATTAINS_points,
+          "ATTAINS_lines" = ATTAINS_lines,
+          "ATTAINS_polygons" = ATTAINS_polygons,
+          "without_ATTAINS_catchments" = nhd_catchments
+        ))
+
+        # If there are no intersecting ATTAINS catchments, fill_catchments = TRUE, and return_sf = FALSE return empty sf features with the
+        # empty TADA_with_ATTAINS df PLUS just the TADA_without_ATTAINS df (i.e., no shapefiles returned).
+      } else {
+        return(list(
+          "TADA_with_ATTAINS" = no_ATTAINS_data[0, ],
+          "TADA_without_ATTAINS" = TADA_without_ATTAINS
+        ))
+      }
+    }
+  }
+
+  # If there IS at least some ATTAINS data, and fill_catchments = FALSE...
+  if (!is.null(nearby_catchments) & fill_catchments == FALSE) {
+    suppressMessages(suppressWarnings({
+      # ... link WQP features to the ATTAINS catchment feature(s) they land in:
+      TADA_with_ATTAINS <- TADA_DataRetrieval_data %>%
+        # (left join = TRUE to preserve all observations (with or without ATTAINS features):)
+        sf::st_join(., nearby_catchments, left = TRUE)
+
+      # If there are intersecting ATTAINS catchments, fill_catchments = FALSE, and return_sf = FALSE, return just the
+      #  TADA_with_ATTAINS df
+      if (return_sf == FALSE) {
+        return(TADA_with_ATTAINS)
+      }
+      # ... otherwise return_sf = TRUE, and therefore must grab ATTAINS features, too:
+
+      # CATCHMENT FEATURES
+      # use original catchment pull, but return column names to original
+      ATTAINS_catchments <- nearby_catchments
+      colnames(ATTAINS_catchments) <- gsub("ATTAINS.", "", colnames(ATTAINS_catchments))
+      # due to the rename, must re-set geometry column:
+      sf::st_geometry(ATTAINS_catchments) <- "geometry"
+
+      # POINT FEATURES - try to pull point AU data if it exists. Otherwise, move on...
+      ATTAINS_points <- NULL
+      try(
+        ATTAINS_points <- attains_features[["ATTAINS_points"]] %>%
+          # subset to only ATTAINS point features in the same NHD HR catchments as WQP observations
+          .[nearby_catchments, ] %>%
+          # make sure no duplicate features exist
+          dplyr::distinct(assessmentunitidentifier, .keep_all = TRUE),
+        silent = TRUE
+      )
+      if (is.null(ATTAINS_points) || nrow(ATTAINS_points) == 0) {
+        ATTAINS_points <- NULL
+      }
+
+      # LINE FEATURES - try to pull line AU data if it exists. Otherwise, move on...
+      ATTAINS_lines <- NULL
+      try(
+        ATTAINS_lines <- attains_features[["ATTAINS_lines"]] %>%
           # subset to only ATTAINS line features in the same NHD HR catchments as WQP observations
           .[nearby_catchments, ] %>%
           # make sure no duplicate line features exist
           dplyr::distinct(assessmentunitidentifier, .keep_all = TRUE),
         silent = TRUE
       )
-      
+      if (is.null(ATTAINS_lines) || nrow(ATTAINS_lines) == 0) {
+        ATTAINS_lines <- NULL
+      }
+
       # POLYGON FEATURES - try to pull polygon AU data if it exists. Otherwise, move on...
       ATTAINS_polygons <- NULL
       try(
@@ -450,8 +1212,11 @@ TADA_GetATTAINS <- function(.data, return_sf = TRUE) {
           dplyr::distinct(assessmentunitidentifier, .keep_all = TRUE),
         silent = TRUE
       )
+      if (is.null(ATTAINS_polygons) || nrow(ATTAINS_polygons) == 0) {
+        ATTAINS_polygons <- NULL
+      }
     }))
-    
+    # If there are ATTAINS catchments, fill_catchments = FALSE, and return_sf = TRUE:
     return(list(
       "TADA_with_ATTAINS" = TADA_with_ATTAINS,
       "ATTAINS_catchments" = ATTAINS_catchments,
@@ -459,7 +1224,126 @@ TADA_GetATTAINS <- function(.data, return_sf = TRUE) {
       "ATTAINS_lines" = ATTAINS_lines,
       "ATTAINS_polygons" = ATTAINS_polygons
     ))
-  }
+
+    # If there IS at least some ATTAINS data, and fill_catchments = TRUE...
+  } else if (!is.null(nearby_catchments) & fill_catchments == TRUE) {
+    # ... link WQP features to the ATTAINS catchment feature(s) they land in:
+    TADA_with_ATTAINS <- TADA_DataRetrieval_data %>%
+      sf::st_join(., nearby_catchments, left = TRUE)
+
+    missing_attains <- dplyr::filter(TADA_with_ATTAINS, is.na(ATTAINS.submissionid))
+
+    # Splitting up sites with and without ATTAINS, so remove those without ATTAINS:
+    TADA_with_ATTAINS <- TADA_with_ATTAINS %>%
+      dplyr::filter(!is.na(ATTAINS.submissionid))
+
+    # If there are no WQP observations without missing ATTAINS features, return empty df for
+    # TADA_without_ATTAINS
+    if (nrow(missing_attains) == 0) {
+      print('All WQP features intersect an ATTAINS catchment. Returning empty dataframe for "TADA_without_ATTAINS".')
+
+      if (resolution %in% c("Med", "med")) {
+        TADA_without_ATTAINS <- tibble::tibble(
+          NHD.comid = character(),
+          NHD.resolution = character(),
+          NHD.catchmentareasqkm = numeric()
+        )
+        nhd_catchments <- NULL
+      } else if (resolution %in% c("Hi", "hi")) {
+        TADA_without_ATTAINS <- tibble::tibble(
+          NHD.nhdplusid = character(),
+          NHD.resolution = character(),
+          NHD.catchmentareasqkm = numeric()
+        )
+
+        nhd_catchments <- NULL
+      } else {
+        stop('Please select between "Med" or "Hi" for your NHD resolution.')
+      }
+    }
+
+    # If there are some observations with no attains features, grab those sites' intersecting NHD catchments:
+    if (nrow(missing_attains) > 0) {
+      # Downloading NHD data to fill in missing ATTAINS features. Depending on the number of observations and
+      # their spatial extent, this can take a while.
+      nhd_catchments <- fetchNHD(
+        .data = missing_attains,
+        resolution = resolution
+      )
+
+      TADA_without_ATTAINS <- missing_attains %>%
+        # left join = TRUE to preserve all observations:
+        sf::st_join(., nhd_catchments, left = TRUE)
+
+      # if there are intersecting ATTAINS, fill_catchments = TRUE, and if return_sf = FALSE, return just the dfs:
+      if (return_sf == FALSE) {
+        return(list(
+          "TADA_with_ATTAINS" = TADA_with_ATTAINS,
+          "TADA_without_ATTAINS" = TADA_without_ATTAINS
+        ))
+      }
+    }
+
+    # CATCHMENT FEATURES
+    # use original catchment pull, but return column names to original
+    ATTAINS_catchments <- nearby_catchments
+    colnames(ATTAINS_catchments) <- gsub("ATTAINS.", "", colnames(ATTAINS_catchments))
+    # due to the rename, must re-set geometry column:
+    sf::st_geometry(ATTAINS_catchments) <- "geometry"
+
+    # POINT FEATURES - try to pull point AU data if it exists. Otherwise, move on...
+    ATTAINS_points <- NULL
+    try(
+      ATTAINS_points <- attains_features[["ATTAINS_points"]] %>%
+        # subset to only ATTAINS point features in the same NHD HR catchments as WQP observations
+        .[nearby_catchments, ] %>%
+        # make sure no duplicate features exist
+        dplyr::distinct(assessmentunitidentifier, .keep_all = TRUE),
+      silent = TRUE
+    )
+    if (is.null(ATTAINS_points) || nrow(ATTAINS_points) == 0) {
+      ATTAINS_points <- NULL
+    }
+
+    # LINE FEATURES - try to pull line AU data if it exists. Otherwise, move on...
+    ATTAINS_lines <- NULL
+    try(
+      ATTAINS_lines <- attains_features[["ATTAINS_lines"]] %>%
+        # subset to only ATTAINS line features in the same NHD HR catchments as WQP observations
+        .[nearby_catchments, ] %>%
+        # make sure no duplicate line features exist
+        dplyr::distinct(assessmentunitidentifier, .keep_all = TRUE),
+      silent = TRUE
+    )
+    if (is.null(ATTAINS_lines) || nrow(ATTAINS_lines) == 0) {
+      ATTAINS_lines <- NULL
+    }
+
+    # POLYGON FEATURES - try to pull polygon AU data if it exists. Otherwise, move on...
+    ATTAINS_polygons <- NULL
+    try(
+      ATTAINS_polygons <- attains_features[["ATTAINS_polygons"]] %>%
+        # subset to only ATTAINS polygon features in the same NHD HR catchments as WQP observations
+        .[nearby_catchments, ] %>%
+        # make sure no duplicate polygon features exist
+        dplyr::distinct(assessmentunitidentifier, .keep_all = TRUE),
+      silent = TRUE
+    )
+    if (is.null(ATTAINS_polygons) || nrow(ATTAINS_polygons) == 0) {
+      ATTAINS_polygons <- NULL
+    }
+
+    # if there is ATTAINS catchment data, fill_catchments = TRUE, return_sf = TRUE, return everything!
+    return(list(
+      "TADA_with_ATTAINS" = TADA_with_ATTAINS,
+      "TADA_without_ATTAINS" = TADA_without_ATTAINS,
+      "ATTAINS_catchments" = ATTAINS_catchments,
+      "ATTAINS_points" = ATTAINS_points,
+      "ATTAINS_lines" = ATTAINS_lines,
+      "ATTAINS_polygons" = ATTAINS_polygons,
+      "without_ATTAINS_catchments" = nhd_catchments
+    ))
+  } # else {stop("Some combination of arguments is impossible.")}
 }
 
 
@@ -467,11 +1351,10 @@ TADA_GetATTAINS <- function(.data, return_sf = TRUE) {
 #'
 #' Visualizes the data returned from TADA_GetATTAINS if return_sf was set to TRUE.
 #'
-#' This function visualizes the raw ATTAINS features that are linked to the
-#' TADA Water Quality Portal observations. For the function to work properly,
-#' the input dataframe must be the list produced from `TADA_GetATTAINS()`
-#' with `return_sf = TRUE`. The map also displays the Water Quality Portal
-#' monitoring locations used to find the ATTAINS features. Check out the
+#' This function visualizes the shapefile features generated with TADA_GetATTAINS and the associated
+#' TADA Water Quality Portal monitoring locations used to find the ATTAINS features. For the function to work properly,
+#' .data must be the list produced from `TADA_GetATTAINS()`
+#' with `return_sf = TRUE`. Check out the
 #' TADAModule2.Rmd for an example workflow.
 #'
 #' @param .data A list containing a data frame and ATTAINS shapefile objects created by `TADA_GetATTAINS()` with the return_sf argument set to TRUE.
@@ -499,302 +1382,516 @@ TADA_GetATTAINS <- function(.data, return_sf = TRUE) {
 #' }
 TADA_ViewATTAINS <- function(.data) {
   if (!any(c(
-    "TADA_with_ATTAINS", "ATTAINS_catchments", "ATTAINS_points",
+    "ATTAINS_catchments", "ATTAINS_points",
     "ATTAINS_lines", "ATTAINS_polygons"
   ) %in% names(.data))) {
-    stop("Your input dataframe was not produced from `TADA_GetATTAINS()` or it was modified. Please create your list of ATTAINS features using `TADA_GetATTAINS()` and confirm that return_sf has been set to TRUE.")
+    stop("Your input dataframe was not produced from `TADA_GetATTAINS(return_sf = TRUE)`, or it was modified. Please create your list of ATTAINS features using `TADA_GetATTAINS()` and confirm that return_sf had been set to TRUE.")
   }
-  
+
   ATTAINS_table <- .data[["TADA_with_ATTAINS"]]
   ATTAINS_catchments <- .data[["ATTAINS_catchments"]]
   ATTAINS_points <- .data[["ATTAINS_points"]]
   ATTAINS_lines <- .data[["ATTAINS_lines"]]
   ATTAINS_polygons <- .data[["ATTAINS_polygons"]]
-  
-  if (nrow(ATTAINS_table) == 0) {
-    stop("Your WQP dataframe has no observations.")
-  }
-  
-  required_columns <- c(
-    "LongitudeMeasure", "LatitudeMeasure",
-    "HorizontalCoordinateReferenceSystemDatumName",
-    "CharacteristicName", "MonitoringLocationIdentifier",
-    "MonitoringLocationName", "ResultIdentifier",
-    "ActivityStartDate", "OrganizationIdentifier"
-  )
-  
-  if (!any(required_columns %in% colnames(ATTAINS_table))) {
-    stop("Your dataframe does not contain the necessary WQP-style column names.")
-  }
-  
-  suppressMessages(suppressWarnings({
-    sf::sf_use_s2(FALSE)
-    
-    # if data was spatial, remove for downstream leaflet dev:
-    try(ATTAINS_table <- ATTAINS_table %>%
-          sf::st_drop_geometry(), silent = TRUE)
-    
-    tada.pal <- TADA_ColorPalette()
-    
-    colors <- data.frame(
-      overallstatus = c("Not Supporting", "Fully Supporting", "Not Assessed"),
-      col = c(tada.pal[3], tada.pal[4], tada.pal[7]),
-      dark_col = c(tada.pal[12], tada.pal[6], tada.pal[11]),
-      priority = c(1, 2, 3)
-    )
-    
-    # POINT FEATURES - try to pull point AU data if it exists. Otherwise, move on...
-    try(
-      points_mapper <- ATTAINS_points %>%
-        dplyr::left_join(., colors, by = "overallstatus") %>%
-        dplyr::mutate(type = "Point Feature") %>%
-        tibble::rowid_to_column(var = "index") %>%
-        # some point features are actually multipoint features. Must extract all coordinates for mapping
-        # later:
-        dplyr::right_join(., tibble::as_tibble(sf::st_coordinates(ATTAINS_points)), by = c("index" = "L1")),
-      silent = TRUE
-    )
-    
-    # LINE FEATURES - try to pull line AU data if it exists. Otherwise, move on...
-    try(
-      lines_mapper <- ATTAINS_lines %>%
-        dplyr::left_join(., colors, by = "overallstatus") %>%
-        dplyr::mutate(type = "Line Feature"),
-      silent = TRUE
-    )
-    
-    # POLYGON FEATURES - try to pull polygon AU data if it exists. Otherwise, move on...
-    try(
-      polygons_mapper <- ATTAINS_polygons %>%
-        dplyr::left_join(., colors, by = "overallstatus") %>%
-        dplyr::mutate(type = "Polygon Feature"),
-      silent = TRUE
-    )
-    
-    # Develop WQP site stats (e.g. count of observations, parameters, per site)
-    sumdat <- ATTAINS_table %>%
-      dplyr::group_by(MonitoringLocationIdentifier, MonitoringLocationName, LatitudeMeasure, LongitudeMeasure) %>%
-      dplyr::summarize(
-        Sample_Count = length(unique(ResultIdentifier)),
-        Visit_Count = length(unique(ActivityStartDate)),
-        Parameter_Count = length(unique(CharacteristicName)),
-        Organization_Count = length(unique(OrganizationIdentifier)),
-        ATTAINS_AUs = as.character(list(unique(ATTAINS.assessmentunitidentifier)))
-      ) %>%
-      dplyr::mutate(
-        ATTAINS_AUs = ifelse(is.na(ATTAINS_AUs), "None", ATTAINS_AUs),
-        LatitudeMeasure = as.numeric(LatitudeMeasure),
-        LongitudeMeasure = as.numeric(LongitudeMeasure)
-      )
-    
-    # Basemap for AOI:
-    map <- leaflet::leaflet() %>%
-      leaflet::addProviderTiles("Esri.WorldTopoMap",
-                                group = "World topo",
-                                options = leaflet::providerTileOptions(
-                                  updateWhenZooming = FALSE,
-                                  updateWhenIdle = TRUE
-                                )
-      ) %>%
-      leaflet::clearShapes() %>%
-      leaflet::fitBounds(
-        lng1 = min(sumdat$LongitudeMeasure),
-        lat1 = min(sumdat$LatitudeMeasure),
-        lng2 = max(sumdat$LongitudeMeasure),
-        lat2 = max(sumdat$LatitudeMeasure)
-      ) %>%
-      leaflet.extras::addResetMapButton() %>%
-      leaflet::addLegend(
-        position = "bottomright",
-        colors = c(tada.pal[3], tada.pal[4], tada.pal[7], "black", NA),
-        labels = c(
-          "ATTAINS: Not Supporting", "ATTAINS: Supporting", "ATTAINS: Not Assessed", "Water Quality Observation(s)",
-          "NHDPlus HR catchments containing water quality observations + ATTAINS feature are represented as clear polygons with black outlines."
-        ),
-        opacity = 1,
-        title = "Legend"
-      )
-    
-    # Add ATTAINS catchment outlines (if they exist):
-    try(
-      map <- map %>%
-        leaflet::addPolygons(
-          data = ATTAINS_catchments,
-          color = "black",
-          weight = 1, fillOpacity = 0,
-          popup = paste0("NHDPlus HR Catchment ID: ", ATTAINS_catchments$nhdplusid)
-        ),
-      silent = TRUE
-    )
-    
-    # Add ATTAINS polygon features (if they exist):
-    try(
-      map <- map %>%
-        leaflet::addPolygons(
-          data = polygons_mapper,
-          color = ~ polygons_mapper$col,
-          fill = ~ polygons_mapper$col,
-          weight = 3, fillOpacity = 1,
-          popup = paste0(
-            "Assessment Unit Name: ", polygons_mapper$assessmentunitname,
-            "<br> Assessment Unit ID: ", polygons_mapper$assessmentunitidentifier,
-            "<br> Status: ", polygons_mapper$overallstatus,
-            "<br> Assessment Unit Type: ", polygons_mapper$type,
-            "<br> <a href=", polygons_mapper$waterbodyreportlink, " target='_blank'>ATTAINS Link</a>"
-          )
-        ),
-      silent = TRUE
-    )
-    
-    # Add ATTAINS lines features (if they exist):
-    try(
-      map <- map %>%
-        leaflet::addPolylines(
-          data = lines_mapper,
-          color = ~ lines_mapper$col,
-          weight = 4, fillOpacity = 1,
-          popup = paste0(
-            "Assessment Unit Name: ", lines_mapper$assessmentunitname,
-            "<br> Assessment Unit ID: ", lines_mapper$assessmentunitidentifier,
-            "<br> Status: ", lines_mapper$overallstatus,
-            "<br> Assessment Unit Type: ", lines_mapper$type,
-            "<br> <a href=", lines_mapper$waterbodyreportlink, " target='_blank'>ATTAINS Link</a>"
-          )
-        ),
-      silent = TRUE
-    )
-    
-    # Add ATTAINS point features (if they exist):
-    try(
-      map <- map %>%
-        leaflet::addCircleMarkers(
-          data = points_mapper,
-          lng = ~X, lat = ~Y,
-          color = ~ points_mapper$col, fillColor = ~ points_mapper$col,
-          fillOpacity = 1, stroke = TRUE, weight = 1.5, radius = 5,
-          popup = paste0(
-            "Assessment Unit Name: ", points_mapper$assessmentunitname,
-            "<br> Assessment Unit ID: ", points_mapper$assessmentunitidentifier,
-            "<br> Status: ", points_mapper$overallstatus,
-            "<br> Assessment Unit Type: ", points_mapper$type,
-            "<br> <a href=", points_mapper$waterbodyreportlink, " target='_blank'>ATTAINS Link</a>"
-          )
-        ),
-      silent = TRUE
-    )
-    
-    # Add WQP observation features (should always exist):
-    try(
-      map <- map %>%
-        leaflet::addCircleMarkers(
-          data = sumdat,
-          lng = ~LongitudeMeasure, lat = ~LatitudeMeasure,
-          color = "grey", fillColor = "black",
-          fillOpacity = 0.8, stroke = TRUE, weight = 1.5, radius = 6,
-          popup = paste0(
-            "Site ID: ", sumdat$MonitoringLocationIdentifier,
-            "<br> Site Name: ", sumdat$MonitoringLocationName,
-            "<br> Measurement Count: ", sumdat$Sample_Count,
-            "<br> Visit Count: ", sumdat$Visit_Count,
-            "<br> Characteristic Count: ", sumdat$Parameter_Count,
-            "<br> ATTAINS Assessment Unit(s): ", sumdat$ATTAINS_AUs
-          )
-        ),
-      silent = TRUE
+
+  # ATTAINS API seems to be missing some AU data that is still preserved in the catchment layer.
+  # Use catchments for those instances for mapping purposes:
+  missing_raw_features <- NULL
+
+  try(missing_raw_features <- ATTAINS_catchments %>%
+    dplyr::filter(!assessmentunitidentifier %in% c(
+      ATTAINS_points$assessmentunitidentifier,
+      ATTAINS_lines$assessmentunitidentifier,
+      ATTAINS_polygons$assessmentunitidentifier
+    )), silent = TRUE)
+
+  if (!"without_ATTAINS_catchments" %in% names(.data)) {
+    if (nrow(ATTAINS_table) == 0) {
+      stop("Your WQP dataframe has no observations.")
+    }
+
+    required_columns <- c(
+      "LongitudeMeasure", "LatitudeMeasure",
+      "HorizontalCoordinateReferenceSystemDatumName",
+      "CharacteristicName", "MonitoringLocationIdentifier",
+      "MonitoringLocationName", "ResultIdentifier",
+      "ActivityStartDate", "OrganizationIdentifier"
     )
-    
-    if (is.null(ATTAINS_lines) & is.null(ATTAINS_points) & is.null(ATTAINS_polygons)) {
-      print("No ATTAINS data associated with this Water Quality Portal data.")
+
+    if (!any(required_columns %in% colnames(ATTAINS_table))) {
+      stop("Your dataframe does not contain the necessary WQP-style column names.")
     }
-    
-    # Return leaflet map of TADA WQ and its associated ATTAINS data
-    return(map)
-  }))
-}
 
+    suppressMessages(suppressWarnings({
+      sf::sf_use_s2(FALSE)
 
-#' Access options available for querying tribal spatial data with `TADA_DataRetrieval()`.
-#' 
-#' @description
-#' This function provides access to [six layer datasets](https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer)
-#' containing spatial data related to tribal lands: "Alaska Native Allotments",
-#' "Alaska Native Villages", "American Indian Reservations", "Off-reservation Trust Lands",
-#' "Oklahoma Tribal Statistical Areas", and "Virginia Federally Recognized Tribes".
-#' These datasets are used by `TADA_DataRetrieval()` when retrieving spatial data
-#' for tribal lands specified by the user. 
-#' 
-#' The purpose of `TADA_TribalOptions()` is to allow the user to review the available
-#' data in those datasets and identify the records they would like to query with
-#' `TADA_DataRetrieval()`.
-#' 
-#' An interactive map of the six layer datasets is available on ArcGIS Online Map
-#' Viewer here: https://www.arcgis.com/apps/mapviewer/index.html?url=https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer&source=sd
-#' 
-#' @param tribal_area_type A character string. Must be one of the six tribal
-#' spatial layers: "Alaska Native Allotments", "Alaska Native Villages", 
-#' "American Indian Reservations", "Off-reservation Trust Lands",
-#' "Oklahoma Tribal Statistical Areas", or "Virginia Federally Recognized Tribes".
-#' 
-#' @param return_sf Logical. Should the function return the dataset as an `sf`
-#' object (TRUE) or a data frame (FALSE)? Defaults to FALSE.
-#' 
-#' @returns A data frame or `sf` object containing the specified layer from the EPA
-#' Map Service.
-#' 
-#' @note
-#' Alaska Native Villages and Virginia Federally Recognized Tribes are point
-#' geometries in the Map Service, not polygons. At the time of this writing they
-#' do not return any data when used for WQP bbox queries.
-#' 
-#' @seealso [TADA_DataRetrieval()]
-#' 
-#' @export
-#' 
-
-TADA_TribalOptions <- function(tribal_area_type, return_sf = FALSE){
-  
-  # Make a reference table for tribal area type + url matching
-  map_service_urls <- tibble::tribble(
-    ~tribal_area,                            ~url,
-    "Alaska Native Allotments",              "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/0",
-    "Alaska Native Villages",                "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/1",
-    "American Indian Reservations",          "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/2",
-    "Off-reservation Trust Lands",           "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/3",
-    "Oklahoma Tribal Statistical Areas",     "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/4",
-    "Virginia Federally Recognized Tribes",  "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/5"
-  )
-  
-  # Confirm usable string provided
-  if( !(tribal_area_type %in% map_service_urls$tribal_area) ){
-    stop("tribal_area_type must match one of the six tribal spatial layer names.")
-  }
-  
-  if( tribal_area_type %in%
-      c("Alaska Native Villages", "Virginia Federally Recognized Tribes") ){
-    warning(
-      paste0(
-        "Alaska Native Villages and Virginia Federally Recognized Tribes are point geometries in the Map Service, not polygons. ",
-        "At the time of this writing they do not return any data when used for WQP bbox queries."
+      # if data was spatial, remove for downstream leaflet dev:
+      try(ATTAINS_table <- ATTAINS_table %>%
+        sf::st_drop_geometry(), silent = TRUE)
+
+      tada.pal <- TADA_ColorPalette()
+
+      colors <- data.frame(
+        overallstatus = c("Not Supporting", "Fully Supporting", "Not Assessed"),
+        col = c(tada.pal[3], tada.pal[4], tada.pal[7]),
+        dark_col = c(tada.pal[12], tada.pal[6], tada.pal[11]),
+        priority = c(1, 2, 3)
       )
-    )
+
+      # POINT FEATURES - try to pull point AU data if it exists. Otherwise, move on...
+      try(
+        points_mapper <- ATTAINS_points %>%
+          dplyr::left_join(., colors, by = "overallstatus") %>%
+          dplyr::mutate(type = "Point Feature") %>%
+          tibble::rowid_to_column(var = "index") %>%
+          # some point features are actually multipoint features. Must extract all coordinates for mapping
+          # later:
+          dplyr::right_join(., tibble::as_tibble(sf::st_coordinates(ATTAINS_points)), by = c("index" = "L1")),
+        silent = TRUE
+      )
+
+      # LINE FEATURES - try to pull line AU data if it exists. Otherwise, move on...
+      try(
+        lines_mapper <- ATTAINS_lines %>%
+          dplyr::left_join(., colors, by = "overallstatus") %>%
+          dplyr::mutate(type = "Line Feature"),
+        silent = TRUE
+      )
+
+      # POLYGON FEATURES - try to pull polygon AU data if it exists. Otherwise, move on...
+      try(
+        polygons_mapper <- ATTAINS_polygons %>%
+          dplyr::left_join(., colors, by = "overallstatus") %>%
+          dplyr::mutate(type = "Polygon Feature"),
+        silent = TRUE
+      )
+
+      # CATCHMENT FEATURES - try to pull missing feature AU data if it exists. Otherwise, move on...
+      try(
+        missing_raw_mapper <- missing_raw_features %>%
+          dplyr::left_join(., colors, by = "overallstatus") %>%
+          dplyr::mutate(type = "Raw Feature Unavailable"),
+        silent = TRUE
+      )
+
+      # Develop WQP site stats (e.g. count of observations, parameters, per site)
+      sumdat <- ATTAINS_table %>%
+        dplyr::group_by(MonitoringLocationIdentifier, MonitoringLocationName, LatitudeMeasure, LongitudeMeasure) %>%
+        dplyr::summarize(
+          Sample_Count = length(unique(ResultIdentifier)),
+          Visit_Count = length(unique(ActivityStartDate)),
+          Parameter_Count = length(unique(CharacteristicName)),
+          Organization_Count = length(unique(OrganizationIdentifier)),
+          ATTAINS_AUs = as.character(list(unique(ATTAINS.assessmentunitidentifier)))
+        ) %>%
+        dplyr::mutate(
+          ATTAINS_AUs = ifelse(is.na(ATTAINS_AUs), "None", ATTAINS_AUs),
+          LatitudeMeasure = as.numeric(LatitudeMeasure),
+          LongitudeMeasure = as.numeric(LongitudeMeasure)
+        )
+
+      # Basemap for AOI:
+      map <- leaflet::leaflet() %>%
+        leaflet::addProviderTiles("Esri.WorldTopoMap",
+          group = "World topo",
+          options = leaflet::providerTileOptions(
+            updateWhenZooming = FALSE,
+            updateWhenIdle = TRUE
+          )
+        ) %>%
+        leaflet::clearShapes() %>%
+        leaflet::fitBounds(
+          lng1 = min(sumdat$LongitudeMeasure),
+          lat1 = min(sumdat$LatitudeMeasure),
+          lng2 = max(sumdat$LongitudeMeasure),
+          lat2 = max(sumdat$LatitudeMeasure)
+        ) %>%
+        leaflet.extras::addResetMapButton() %>%
+        leaflet::addLegend(
+          position = "bottomright",
+          colors = c(tada.pal[3], tada.pal[4], tada.pal[7], "black", NA),
+          labels = c(
+            "ATTAINS: Not Supporting", "ATTAINS: Supporting", "ATTAINS: Not Assessed", "Water Quality Observation(s)",
+            "NHDPlus HR catchments containing water quality observations + ATTAINS feature are represented as clear polygons with black outlines."
+          ),
+          opacity = 1,
+          title = "Legend"
+        )
+
+      # Add ATTAINS catchment outlines (if they exist):
+      try(
+        map <- map %>%
+          leaflet::addPolygons(
+            data = ATTAINS_catchments,
+            color = "black",
+            weight = 1, fillOpacity = 0,
+            popup = paste0("NHDPlus HR Catchment ID: ", ATTAINS_catchments$nhdplusid)
+          ),
+        silent = TRUE
+      )
+
+      # Add ATTAINS catchment outlines as AUs:
+      try(
+        map <- map %>%
+          leaflet::addPolygons(
+            data = missing_raw_mapper,
+            color = ~ missing_raw_mapper$col,
+            fill = ~ missing_raw_mapper$col,
+            weight = 3, fillOpacity = 0.25,
+            popup = paste0(
+              "Assessment Unit Name: ", missing_raw_mapper$assessmentunitname,
+              "<br> Assessment Unit ID: ", missing_raw_mapper$assessmentunitidentifier,
+              "<br> Status: ", missing_raw_mapper$overallstatus,
+              "<br> Assessment Unit Type: ", missing_raw_mapper$type,
+              "<br> <a href=", missing_raw_mapper$waterbodyreportlink, " target='_blank'>ATTAINS Link</a>",
+              "<br> NHDPlus HR Catchment ID: ", missing_raw_mapper$nhdplusid
+            )
+          ),
+        silent = TRUE
+      )
+
+      # Add ATTAINS polygon features (if they exist):
+      try(
+        map <- map %>%
+          leaflet::addPolygons(
+            data = polygons_mapper,
+            color = ~ polygons_mapper$col,
+            fill = ~ polygons_mapper$col,
+            weight = 3, fillOpacity = 1,
+            popup = paste0(
+              "Assessment Unit Name: ", polygons_mapper$assessmentunitname,
+              "<br> Assessment Unit ID: ", polygons_mapper$assessmentunitidentifier,
+              "<br> Status: ", polygons_mapper$overallstatus,
+              "<br> Assessment Unit Type: ", polygons_mapper$type,
+              "<br> <a href=", polygons_mapper$waterbodyreportlink, " target='_blank'>ATTAINS Link</a>"
+            )
+          ),
+        silent = TRUE
+      )
+
+      # Add ATTAINS lines features (if they exist):
+      try(
+        map <- map %>%
+          leaflet::addPolylines(
+            data = lines_mapper,
+            color = ~ lines_mapper$col,
+            weight = 4, fillOpacity = 1,
+            popup = paste0(
+              "Assessment Unit Name: ", lines_mapper$assessmentunitname,
+              "<br> Assessment Unit ID: ", lines_mapper$assessmentunitidentifier,
+              "<br> Status: ", lines_mapper$overallstatus,
+              "<br> Assessment Unit Type: ", lines_mapper$type,
+              "<br> <a href=", lines_mapper$waterbodyreportlink, " target='_blank'>ATTAINS Link</a>"
+            )
+          ),
+        silent = TRUE
+      )
+
+      # Add ATTAINS point features (if they exist):
+      try(
+        map <- map %>%
+          leaflet::addCircleMarkers(
+            data = points_mapper,
+            lng = ~X, lat = ~Y,
+            color = ~ points_mapper$col, fillColor = ~ points_mapper$col,
+            fillOpacity = 1, stroke = TRUE, weight = 1.5, radius = 5,
+            popup = paste0(
+              "Assessment Unit Name: ", points_mapper$assessmentunitname,
+              "<br> Assessment Unit ID: ", points_mapper$assessmentunitidentifier,
+              "<br> Status: ", points_mapper$overallstatus,
+              "<br> Assessment Unit Type: ", points_mapper$type,
+              "<br> <a href=", points_mapper$waterbodyreportlink, " target='_blank'>ATTAINS Link</a>"
+            )
+          ),
+        silent = TRUE
+      )
+
+      # Add WQP observation features (should always exist):
+      try(
+        map <- map %>%
+          leaflet::addCircleMarkers(
+            data = sumdat,
+            lng = ~LongitudeMeasure, lat = ~LatitudeMeasure,
+            color = "grey", fillColor = "black",
+            fillOpacity = 0.8, stroke = TRUE, weight = 1.5, radius = 6,
+            popup = paste0(
+              "Site ID: ", sumdat$MonitoringLocationIdentifier,
+              "<br> Site Name: ", sumdat$MonitoringLocationName,
+              "<br> Measurement Count: ", sumdat$Sample_Count,
+              "<br> Visit Count: ", sumdat$Visit_Count,
+              "<br> Characteristic Count: ", sumdat$Parameter_Count,
+              "<br> ATTAINS Assessment Unit(s): ", sumdat$ATTAINS_AUs
+            )
+          ),
+        silent = TRUE
+      )
+
+      if (is.null(ATTAINS_lines) & is.null(ATTAINS_points) & is.null(ATTAINS_polygons)) {
+        print("No ATTAINS data associated with this Water Quality Portal data.")
+      }
+
+      # Return leaflet map of TADA WQ and its associated ATTAINS data
+      return(map)
+    }))
   }
-  
-  # Query Map Service
-  tribal_area_sf <- dplyr::filter(map_service_urls,
-                                  tribal_area == tribal_area_type)$url %>%
-    arcgislayers::arc_open() %>%
-    # Return sf
-    arcgislayers::arc_select() %>%
-    sf::st_make_valid()
-  
-  # Convert to df if needed, export
-  if(return_sf == FALSE){
-    return(
-      as.data.frame(tribal_area_sf) %>%
-        sf::st_drop_geometry()
+
+  if ("without_ATTAINS_catchments" %in% names(.data)) {
+    without_ATTAINS_table <- .data[["TADA_without_ATTAINS"]]
+
+    if (nrow(ATTAINS_table) == 0 & nrow(without_ATTAINS_table) == 0) {
+      stop("Your WQP dataframe has no observations.")
+    }
+
+    required_columns <- c(
+      "LongitudeMeasure", "LatitudeMeasure",
+      "HorizontalCoordinateReferenceSystemDatumName",
+      "CharacteristicName", "MonitoringLocationIdentifier",
+      "MonitoringLocationName", "ResultIdentifier",
+      "ActivityStartDate", "OrganizationIdentifier"
     )
-  } else {
-    return(tribal_area_sf)
+
+    if (!any(required_columns %in% colnames(ATTAINS_table))) {
+      stop("Your dataframe does not contain the necessary WQP-style column names.")
+    }
+
+    without_ATTAINS_catchments <- NULL
+    try(without_ATTAINS_catchments <- .data[["without_ATTAINS_catchments"]] %>%
+      dplyr::rename(nhd = 1), silent = TRUE)
+
+    suppressMessages(suppressWarnings({
+      sf::sf_use_s2(FALSE)
+
+      # if data was spatial, remove for downstream leaflet dev.
+      # But first if no data in the ATTAINS table, add in required column names to
+      # without ATTAINS data:
+      if (nrow(ATTAINS_table) == 0) {
+        new_columns <- names(ATTAINS_table)[grep("^ATTAINS\\.", names(ATTAINS_table))]
+        ATTAINS_table <- without_ATTAINS_table %>%
+          sf::st_drop_geometry()
+
+        ATTAINS_table[new_columns] <- NA
+      } else {
+        ATTAINS_table <- ATTAINS_table %>%
+          sf::st_drop_geometry() %>%
+          dplyr::bind_rows(without_ATTAINS_table)
+      }
+
+      tada.pal <- TADA_ColorPalette()
+
+      colors <- data.frame(
+        overallstatus = c("Not Supporting", "Fully Supporting", "Not Assessed"),
+        col = c(tada.pal[3], tada.pal[4], tada.pal[7]),
+        dark_col = c(tada.pal[12], tada.pal[6], tada.pal[11]),
+        priority = c(1, 2, 3)
+      )
+
+      # POINT FEATURES - try to pull point AU data if it exists. Otherwise, move on...
+      try(
+        points_mapper <- ATTAINS_points %>%
+          dplyr::left_join(., colors, by = "overallstatus") %>%
+          dplyr::mutate(type = "Point Feature") %>%
+          tibble::rowid_to_column(var = "index") %>%
+          # some point features are actually multipoint features. Must extract all coordinates for mapping
+          # later:
+          dplyr::right_join(., tibble::as_tibble(sf::st_coordinates(ATTAINS_points)), by = c("index" = "L1")),
+        silent = TRUE
+      )
+
+      # LINE FEATURES - try to pull line AU data if it exists. Otherwise, move on...
+      try(
+        lines_mapper <- ATTAINS_lines %>%
+          dplyr::left_join(., colors, by = "overallstatus") %>%
+          dplyr::mutate(type = "Line Feature"),
+        silent = TRUE
+      )
+
+      # POLYGON FEATURES - try to pull polygon AU data if it exists. Otherwise, move on...
+      try(
+        polygons_mapper <- ATTAINS_polygons %>%
+          dplyr::left_join(., colors, by = "overallstatus") %>%
+          dplyr::mutate(type = "Polygon Feature"),
+        silent = TRUE
+      )
+
+      # CATCHMENT FEATURES - try to pull missing feature AU data if it exists. Otherwise, move on...
+      try(
+        missing_raw_mapper <- missing_raw_features %>%
+          dplyr::left_join(., colors, by = "overallstatus") %>%
+          dplyr::mutate(type = "Raw Feature Unavailable"),
+        silent = TRUE
+      )
+
+      # Develop WQP site stats (e.g. count of observations, parameters, per site)
+      sumdat <- ATTAINS_table %>%
+        dplyr::group_by(MonitoringLocationIdentifier, MonitoringLocationName, LatitudeMeasure, LongitudeMeasure) %>%
+        dplyr::summarize(
+          Sample_Count = length(unique(ResultIdentifier)),
+          Visit_Count = length(unique(ActivityStartDate)),
+          Parameter_Count = length(unique(CharacteristicName)),
+          Organization_Count = length(unique(OrganizationIdentifier)),
+          ATTAINS_AUs = as.character(list(unique(ATTAINS.assessmentunitidentifier)))
+        ) %>%
+        dplyr::mutate(
+          ATTAINS_AUs = ifelse(is.na(ATTAINS_AUs), "None", ATTAINS_AUs),
+          LatitudeMeasure = as.numeric(LatitudeMeasure),
+          LongitudeMeasure = as.numeric(LongitudeMeasure)
+        )
+
+      # Basemap for AOI:
+      map <- leaflet::leaflet() %>%
+        leaflet::addProviderTiles("Esri.WorldTopoMap",
+          group = "World topo",
+          options = leaflet::providerTileOptions(
+            updateWhenZooming = FALSE,
+            updateWhenIdle = TRUE
+          )
+        ) %>%
+        leaflet::clearShapes() %>%
+        leaflet::fitBounds(
+          lng1 = min(sumdat$LongitudeMeasure),
+          lat1 = min(sumdat$LatitudeMeasure),
+          lng2 = max(sumdat$LongitudeMeasure),
+          lat2 = max(sumdat$LatitudeMeasure)
+        ) %>%
+        leaflet.extras::addResetMapButton() %>%
+        leaflet::addLegend(
+          position = "bottomright",
+          colors = c(tada.pal[3], tada.pal[4], tada.pal[7], "black", NA, NA),
+          labels = c(
+            "ATTAINS: Not Supporting", "ATTAINS: Supporting", "ATTAINS: Not Assessed", "Water Quality Observation(s)",
+            "NHDPlus HiRes catchments containing a WQP site + ATTAINS feature(s) are represented as clear polygons.",
+            "Intersecting grey catchments are those without an ATTAINS feature if fill_catchments = TRUE."
+          ),
+          opacity = 1,
+          title = "Legend"
+        )
+
+      # Add ATTAINS catchment outlines (if they exist):
+      try(
+        map <- map %>%
+          leaflet::addPolygons(
+            data = ATTAINS_catchments,
+            color = "black",
+            weight = 1, fillOpacity = 0,
+            popup = paste0("NHDPlus HR Catchment ID: ", ATTAINS_catchments$nhdplusid)
+          ),
+        silent = TRUE
+      )
+
+      # Add missing catchment outlines (if they exist):
+      try(
+        map <- map %>%
+          leaflet::addPolygons(
+            data = without_ATTAINS_catchments,
+            color = "black", fillColor = "grey",
+            weight = 1, fillOpacity = 0.3,
+            popup = paste0(without_ATTAINS_catchments$NHD.resolution, " catchment ID: ", without_ATTAINS_catchments$nhd)
+          ),
+        silent = TRUE
+      )
+
+      # Add ATTAINS catchment outlines as AUs:
+      try(
+        map <- map %>%
+          leaflet::addPolygons(
+            data = missing_raw_mapper,
+            color = ~ missing_raw_mapper$col,
+            fill = ~ missing_raw_mapper$col,
+            weight = 3, fillOpacity = 0.25,
+            popup = paste0(
+              "Assessment Unit Name: ", missing_raw_mapper$assessmentunitname,
+              "<br> Assessment Unit ID: ", missing_raw_mapper$assessmentunitidentifier,
+              "<br> Status: ", missing_raw_mapper$overallstatus,
+              "<br> Assessment Unit Type: ", missing_raw_mapper$type,
+              "<br> <a href=", missing_raw_mapper$waterbodyreportlink, " target='_blank'>ATTAINS Link</a>",
+              "<br> NHDPlus HR Catchment ID: ", missing_raw_mapper$nhdplusid
+            )
+          ),
+        silent = TRUE
+      )
+
+      # Add ATTAINS polygon features (if they exist):
+      try(
+        map <- map %>%
+          leaflet::addPolygons(
+            data = polygons_mapper,
+            color = ~ polygons_mapper$col,
+            fill = ~ polygons_mapper$col,
+            weight = 3, fillOpacity = 1,
+            popup = paste0(
+              "Assessment Unit Name: ", polygons_mapper$assessmentunitname,
+              "<br> Assessment Unit ID: ", polygons_mapper$assessmentunitidentifier,
+              "<br> Status: ", polygons_mapper$overallstatus,
+              "<br> Assessment Unit Type: ", polygons_mapper$type,
+              "<br> <a href=", polygons_mapper$waterbodyreportlink, " target='_blank'>ATTAINS Link</a>"
+            )
+          ),
+        silent = TRUE
+      )
+
+      # Add ATTAINS lines features (if they exist):
+      try(
+        map <- map %>%
+          leaflet::addPolylines(
+            data = lines_mapper,
+            color = ~ lines_mapper$col,
+            weight = 4, fillOpacity = 1,
+            popup = paste0(
+              "Assessment Unit Name: ", lines_mapper$assessmentunitname,
+              "<br> Assessment Unit ID: ", lines_mapper$assessmentunitidentifier,
+              "<br> Status: ", lines_mapper$overallstatus,
+              "<br> Assessment Unit Type: ", lines_mapper$type,
+              "<br> <a href=", lines_mapper$waterbodyreportlink, " target='_blank'>ATTAINS Link</a>"
+            )
+          ),
+        silent = TRUE
+      )
+
+      # Add ATTAINS point features (if they exist):
+      try(
+        map <- map %>%
+          leaflet::addCircleMarkers(
+            data = points_mapper,
+            lng = ~X, lat = ~Y,
+            color = ~ points_mapper$col, fillColor = ~ points_mapper$col,
+            fillOpacity = 1, stroke = TRUE, weight = 1.5, radius = 5,
+            popup = paste0(
+              "Assessment Unit Name: ", points_mapper$assessmentunitname,
+              "<br> Assessment Unit ID: ", points_mapper$assessmentunitidentifier,
+              "<br> Status: ", points_mapper$overallstatus,
+              "<br> Assessment Unit Type: ", points_mapper$type,
+              "<br> <a href=", points_mapper$waterbodyreportlink, " target='_blank'>ATTAINS Link</a>"
+            )
+          ),
+        silent = TRUE
+      )
+
+      # Add WQP observation features (should always exist):
+      try(
+        map <- map %>%
+          leaflet::addCircleMarkers(
+            data = sumdat,
+            lng = ~LongitudeMeasure, lat = ~LatitudeMeasure,
+            color = "grey", fillColor = "black",
+            fillOpacity = 0.8, stroke = TRUE, weight = 1.5, radius = 6,
+            popup = paste0(
+              "Site ID: ", sumdat$MonitoringLocationIdentifier,
+              "<br> Site Name: ", sumdat$MonitoringLocationName,
+              "<br> Measurement Count: ", sumdat$Sample_Count,
+              "<br> Visit Count: ", sumdat$Visit_Count,
+              "<br> Characteristic Count: ", sumdat$Parameter_Count,
+              "<br> ATTAINS Assessment Unit(s): ", sumdat$ATTAINS_AUs
+            )
+          ),
+        silent = TRUE
+      )
+
+      if (is.null(ATTAINS_lines) & is.null(ATTAINS_points) & is.null(ATTAINS_polygons)) {
+        print("No ATTAINS data associated with this Water Quality Portal data.")
+      }
+
+      # Return leaflet map of TADA WQ and its associated ATTAINS data
+      return(map)
+    }))
   }
-  
 }

From 001c2ebf54ec47a618073e6dbf2570b04eb53343 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Thu, 21 Nov 2024 16:29:07 -0800
Subject: [PATCH 08/35] warning -> message & add dplyr::

---
 R/DataDiscoveryRetrieval.R | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 56a2e57e..b83abac4 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -54,7 +54,7 @@
 #' @param organization A string of letters and/or numbers (some additional characters also possible) used to signify an organization with data in the Water Quality Portal. See https://www.waterqualitydata.us/Codes/organization for options.
 #' @param project A string of letters and/or numbers (some additional characters also possible) used to signify a project with data in the Water Quality Portal. See https://www.waterqualitydata.us/Codes/project for options.
 #' @param providers Leave blank to include all, or specify "STEWARDS", "STORET" (i.e., WQX), and/or "NWIS". See https://www.waterqualitydata.us/Codes/providers for options.
-#' @param applyautoclean Logical, defaults to TRUE. Applies TADA_AutoClean function on the returned data profile.
+#' @param applyautoclean Logical, defaults to TRUE. Applies TADA_AutoClean function on the returned data profile. Suggest switching to FALSE for queries that are expected to be large.
 #'
 #' @return TADA-compatible dataframe
 #'
@@ -196,8 +196,8 @@ TADA_DataRetrieval <- function(startDate = "null",
                                providers = "null",
                                maxrecs = 250000,
                                applyautoclean = TRUE) {
-
-    # Check for incomplete or inconsistent inputs:
+  
+  # Check for incomplete or inconsistent inputs:
   
   # If both an sf object and tribe information are provided it's unclear what
   # the priority should be for the query
@@ -437,7 +437,7 @@ TADA_DataRetrieval <- function(startDate = "null",
     # Check for either more than 300 sites or more records than max_recs.
     # If either is true then we'll approach the pull as a "big data" pull
     if( site_count > 300 | record_count > maxrecs) {
-      warning(
+      message(
         paste0(
           "The number of sites and/or records matched by the AOI and query terms is large, so the download may take some time. ",
           "If your AOI is a county, state, country, or HUC boundary it would be more efficient to provide a code instead of an sf object."
@@ -901,7 +901,7 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
   # Number of sites per download group limited to 300.
   if (dim(smallsites)[1] > 0) {
     smallsitesgrp <- smallsites %>%
-      mutate(group = MESS::cumsumbinning(
+      dplyr::mutate(group = MESS::cumsumbinning(
         x = tot_n,
         threshold = maxrecs,
         maxgroupsize = 300
@@ -1167,7 +1167,7 @@ TADA_BigDataRetrieval <- function(startDate = "null",
     rm(df_summary)
     # if there are still site records when filtered to years of interest....
     if (dim(sites)[1] > 0) {
-     
+      
       
       
       # get total number of results per site and separate out sites with >250000 results

From 19d80a2c459c9dd43c6ae9837bedffd140bac755 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Fri, 22 Nov 2024 10:02:37 -0800
Subject: [PATCH 09/35] odds and ends

---
 NAMESPACE                 |  1 -
 man/TADA_BigDataHelper.Rd | 28 +++++++++++++++++++++++++
 man/TADA_DataRetrieval.Rd |  3 ++-
 man/TADA_TribalOptions.Rd | 44 ---------------------------------------
 4 files changed, 30 insertions(+), 46 deletions(-)
 create mode 100644 man/TADA_BigDataHelper.Rd
 delete mode 100644 man/TADA_TribalOptions.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 0b286c45..9bd8f6cb 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -74,7 +74,6 @@ export(TADA_SimpleCensoredMethods)
 export(TADA_Stats)
 export(TADA_SubstituteDeprecatedChars)
 export(TADA_SummarizeColumn)
-export(TADA_TribalOptions)
 export(TADA_TwoCharacteristicScatterplot)
 export(TADA_UniqueCharUnitSpeciation)
 export(TADA_ViewATTAINS)
diff --git a/man/TADA_BigDataHelper.Rd b/man/TADA_BigDataHelper.Rd
new file mode 100644
index 00000000..7968424c
--- /dev/null
+++ b/man/TADA_BigDataHelper.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/DataDiscoveryRetrieval.R
+\name{TADA_BigDataHelper}
+\alias{TADA_BigDataHelper}
+\title{Assist with large WQP data pulls using dataRetrieval}
+\usage{
+TADA_BigDataHelper(record_summary, WQPquery, maxrecs = 250000, maxsites = 300)
+}
+\arguments{
+\item{record_summary}{MonitoringLocationIdentifier and resultCount columns from the output of \code{dataRetrieval::whatWQPdata} for the WQP query being used.}
+
+\item{WQPquery}{A named list of query terms to supply dataRetrieval functions.}
+
+\item{maxrecs}{Maximum number of records to query at once.}
+
+\item{maxsites}{Maximum number of sites to query at once.}
+}
+\value{
+TADA-compatible dataframe
+}
+\description{
+This is a helper function that takes large WQP (waterqualitydata.us) queries
+and splits them up into multiple, smaller queries. By default it pulls data
+for up to 300 sites or 250,000 data records at a time and then joins the separate
+pulls back together to produce a single TADA compatible dataframe as the output.
+Computer memory may limit the size of data frames that your R console will
+be able to hold in one session.
+}
diff --git a/man/TADA_DataRetrieval.Rd b/man/TADA_DataRetrieval.Rd
index e49f1e03..24b30c87 100644
--- a/man/TADA_DataRetrieval.Rd
+++ b/man/TADA_DataRetrieval.Rd
@@ -22,6 +22,7 @@ TADA_DataRetrieval(
   organization = "null",
   project = "null",
   providers = "null",
+  maxrecs = 250000,
   applyautoclean = TRUE
 )
 }
@@ -60,7 +61,7 @@ TADA_DataRetrieval(
 
 \item{providers}{Leave blank to include all, or specify "STEWARDS", "STORET" (i.e., WQX), and/or "NWIS". See https://www.waterqualitydata.us/Codes/providers for options.}
 
-\item{applyautoclean}{Logical, defaults to TRUE. Applies TADA_AutoClean function on the returned data profile.}
+\item{applyautoclean}{Logical, defaults to TRUE. Applies TADA_AutoClean function on the returned data profile. Suggest switching to FALSE for queries that are expected to be large.}
 }
 \value{
 TADA-compatible dataframe
diff --git a/man/TADA_TribalOptions.Rd b/man/TADA_TribalOptions.Rd
deleted file mode 100644
index 05396d87..00000000
--- a/man/TADA_TribalOptions.Rd
+++ /dev/null
@@ -1,44 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/GeospatialFunctions.R
-\name{TADA_TribalOptions}
-\alias{TADA_TribalOptions}
-\title{Access options available for querying tribal spatial data with \code{TADA_DataRetrieval()}.}
-\usage{
-TADA_TribalOptions(tribal_area_type, return_sf = FALSE)
-}
-\arguments{
-\item{tribal_area_type}{A character string. Must be one of the six tribal
-spatial layers: "Alaska Native Allotments", "Alaska Native Villages",
-"American Indian Reservations", "Off-reservation Trust Lands",
-"Oklahoma Tribal Statistical Areas", or "Virginia Federally Recognized Tribes".}
-
-\item{return_sf}{Logical. Should the function return the dataset as an \code{sf}
-object (TRUE) or a data frame (FALSE)? Defaults to FALSE.}
-}
-\value{
-A data frame or \code{sf} object containing the specified layer from the EPA
-Map Service.
-}
-\description{
-This function provides access to \href{https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer}{six layer datasets}
-containing spatial data related to tribal lands: "Alaska Native Allotments",
-"Alaska Native Villages", "American Indian Reservations", "Off-reservation Trust Lands",
-"Oklahoma Tribal Statistical Areas", and "Virginia Federally Recognized Tribes".
-These datasets are used by \code{TADA_DataRetrieval()} when retrieving spatial data
-for tribal lands specified by the user.
-
-The purpose of \code{TADA_TribalOptions()} is to allow the user to review the available
-data in those datasets and identify the records they would like to query with
-\code{TADA_DataRetrieval()}.
-
-An interactive map of the six layer datasets is available on ArcGIS Online Map
-Viewer here: https://www.arcgis.com/apps/mapviewer/index.html?url=https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer&source=sd
-}
-\note{
-Alaska Native Villages and Virginia Federally Recognized Tribes are point
-geometries in the Map Service, not polygons. At the time of this writing they
-do not return any data when used for WQP bbox queries.
-}
-\seealso{
-\code{\link[=TADA_DataRetrieval]{TADA_DataRetrieval()}}
-}

From a71a49594189c2f1e9ce4aa77bfe2785abef57bb Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Fri, 3 Jan 2025 16:41:27 -0800
Subject: [PATCH 10/35] Add user prompts to data retrieval

---
 R/DataDiscoveryRetrieval.R | 61 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index b83abac4..84c3b45c 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -54,6 +54,8 @@
 #' @param organization A string of letters and/or numbers (some additional characters also possible) used to signify an organization with data in the Water Quality Portal. See https://www.waterqualitydata.us/Codes/organization for options.
 #' @param project A string of letters and/or numbers (some additional characters also possible) used to signify a project with data in the Water Quality Portal. See https://www.waterqualitydata.us/Codes/project for options.
 #' @param providers Leave blank to include all, or specify "STEWARDS", "STORET" (i.e., WQX), and/or "NWIS". See https://www.waterqualitydata.us/Codes/providers for options.
+#' @param maxrecs Maximum number of records to query at once.
+#' @param ask A logical value indicating whether the user should be asked for approval before downloads begin.
 #' @param applyautoclean Logical, defaults to TRUE. Applies TADA_AutoClean function on the returned data profile. Suggest switching to FALSE for queries that are expected to be large.
 #'
 #' @return TADA-compatible dataframe
@@ -195,6 +197,7 @@ TADA_DataRetrieval <- function(startDate = "null",
                                project = "null",
                                providers = "null",
                                maxrecs = 250000,
+                               ask = TRUE,
                                applyautoclean = TRUE) {
   
   # Check for incomplete or inconsistent inputs:
@@ -432,6 +435,19 @@ TADA_DataRetrieval <- function(startDate = "null",
       dplyr::pull(resultCount) %>%
       sum()
     
+    # Should we proceed with downloads? If ask == TRUE then ask the user.
+    if(ask == TRUE){
+      user_decision <- ask_user(n_records = record_count)
+      
+      # Act on input
+      if(user_decision == "yes") {
+        print("Proceeding with download.")
+      } else {
+        stop("Cancelled by user.", call. = FALSE)
+      }
+    }
+    
+    # Continue now with site count
     site_count <- length(clipped_site_ids)
     
     # Check for either more than 300 sites or more records than max_recs.
@@ -687,6 +703,18 @@ TADA_DataRetrieval <- function(startDate = "null",
       dplyr::pull(resultCount) %>%
       sum()
     
+    # Should we proceed with downloads? If ask == TRUE then ask the user.
+    if(ask == TRUE){
+      user_decision <- ask_user(n_records = record_count)
+      
+      # Act on input
+      if(user_decision == "yes") {
+        print("Proceeding with download.")
+      } else {
+        stop("Cancelled by user.", call. = FALSE)
+      }
+    }
+    
     # Check for either more than 300 sites or more records than max_recs.
     # If either is true then we'll approach the pull as a "big data" pull
     if(site_count > 300 | record_count > maxrecs) {
@@ -1352,6 +1380,39 @@ TADA_JoinWQPProfiles <- function(FullPhysChem = "null",
   return(join2)
 }
 
+#' Ask user to approve WQP downloads
+#' 
+#' Once record counts have been retrieved from the Water Quality Portal (WQP) for
+#' a query, this function is used to prompt the user to decide (i.e., "yes"/"no")
+#' whether the download should proceed. The user is also reminded of the limits of
+#' Microsoft Excel for row counts as a comparison.
+#' 
+#' @param n_records A numeric value indicating the number of records that will be downloaded from the WQP if the user decides to proceed.
+ask_user <- function(n_records){
+  
+  # Text to show user
+  user_prompt <- cat(
+    "Your WQP query will return ",
+    n_records,
+    " records.\nFor reference, Microsoft Excel will only display ~one million.\n",
+    "Would you like to continue with the download? [yes/no] ",
+    sep = ""
+  )
+  
+  # Ask user if they want to continue & check for valid response
+  while(TRUE){
+    user_input <- readline(prompt = user_prompt)
+    # Convert response to lower and no whitespace
+    user_input <- tolower(trimws(user_input))
+    if (user_input == "yes" || user_input == "no") {
+      return(user_input)
+    } else {
+      cat("Invalid input. Please enter 'yes' or 'no'.\n")
+    }
+  }
+  
+}
+
 
 # function for chunking by records
 make_groups <- function(x, maxrecs) {

From 0d55e7452a3884b1835f93332143f2da0d0e36b6 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Mon, 6 Jan 2025 10:58:22 -0800
Subject: [PATCH 11/35] Add progress bar & change warnings to messages

---
 R/DataDiscoveryRetrieval.R | 96 +++++++++++++++++++++++---------------
 1 file changed, 58 insertions(+), 38 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 84c3b45c..52c6f789 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -409,9 +409,11 @@ TADA_DataRetrieval <- function(startDate = "null",
     input_bbox <- sf::st_bbox(aoi_sf)
     
     # Query info on available data within the bbox
-    bbox_avail <- dataRetrieval::whatWQPdata(
-      WQPquery,
-      bBox = c(input_bbox$xmin, input_bbox$ymin, input_bbox$xmax, input_bbox$ymax)
+    bbox_avail <- suppressMessages(
+      dataRetrieval::whatWQPdata(
+        WQPquery,
+        bBox = c(input_bbox$xmin, input_bbox$ymin, input_bbox$xmax, input_bbox$ymax)
+      )
     )
     
     # Check if any sites are within the aoi
@@ -420,8 +422,10 @@ TADA_DataRetrieval <- function(startDate = "null",
     }
     
     # Reformat returned info as sf
-    bbox_sites_sf <- dataRetrieval::whatWQPsites(
-      siteid = bbox_avail$MonitoringLocationIdentifier
+    bbox_sites_sf <- suppressMessages(
+      dataRetrieval::whatWQPsites(
+        siteid = bbox_avail$MonitoringLocationIdentifier
+      )
     ) %>%
       TADA_MakeSpatial(., crs = 4326)
     
@@ -461,7 +465,7 @@ TADA_DataRetrieval <- function(startDate = "null",
       )
       
       # Use helper function to download large data volume
-      results.DR <- suppressMessages(
+      results.DR <- withCallingHandlers(
         TADA_BigDataHelper(
           record_summary = bbox_avail %>%
             dplyr::select(MonitoringLocationIdentifier, resultCount) %>%
@@ -469,9 +473,11 @@ TADA_DataRetrieval <- function(startDate = "null",
           WQPquery = WQPquery,
           maxrecs = maxrecs,
           maxsites = 300
-        )
+        ),
+        message = function(m) message(m$message)
       )
       
+      
       rm(bbox_avail, bbox_sites_sf)
       gc()
       
@@ -495,11 +501,13 @@ TADA_DataRetrieval <- function(startDate = "null",
           select(-geometry)
         
         # Get project metadata
-        projects.DR <- dataRetrieval::readWQPdata(
-          siteid = clipped_site_ids,
-          WQPquery,
-          ignore_attributes = TRUE,
-          service = "Project"
+        projects.DR <- suppressMessages(
+          dataRetrieval::readWQPdata(
+            siteid = clipped_site_ids,
+            WQPquery,
+            ignore_attributes = TRUE,
+            service = "Project"
+          )
         )
         
         # Join results, sites, projects
@@ -531,11 +539,13 @@ TADA_DataRetrieval <- function(startDate = "null",
       print(WQPquery)
       
       # Get results
-      results.DR <- dataRetrieval::readWQPdata(
-        siteid = clipped_site_ids,
-        WQPquery,
-        dataProfile = "resultPhysChem",
-        ignore_attributes = TRUE
+      results.DR <- suppressMessages(
+        dataRetrieval::readWQPdata(
+          siteid = clipped_site_ids,
+          WQPquery,
+          dataProfile = "resultPhysChem",
+          ignore_attributes = TRUE
+        )
       )
       
       # Check if any results were returned
@@ -555,11 +565,13 @@ TADA_DataRetrieval <- function(startDate = "null",
           select(-geometry)
         
         # Get project metadata
-        projects.DR <- dataRetrieval::readWQPdata(
-          siteid = clipped_site_ids,
-          WQPquery,
-          ignore_attributes = TRUE,
-          service = "Project"
+        projects.DR <- suppressMessages(
+          dataRetrieval::readWQPdata(
+            siteid = clipped_site_ids,
+            WQPquery,
+            ignore_attributes = TRUE,
+            service = "Project"
+          )
         )
         
         # Join results, sites, projects
@@ -695,7 +707,7 @@ TADA_DataRetrieval <- function(startDate = "null",
     }
     
     # Query info on available data
-    query_avail <- dataRetrieval::whatWQPdata(WQPquery)
+    query_avail <- supressMessages(dataRetrieval::whatWQPdata(WQPquery))
     
     site_count <- length(query_avail$MonitoringLocationIdentifier)
     
@@ -718,7 +730,7 @@ TADA_DataRetrieval <- function(startDate = "null",
     # Check for either more than 300 sites or more records than max_recs.
     # If either is true then we'll approach the pull as a "big data" pull
     if(site_count > 300 | record_count > maxrecs) {
-      warning(
+      message(
         "The number of sites and/or records matched by the query terms is large, so the download may take some time."
       )
       
@@ -737,16 +749,20 @@ TADA_DataRetrieval <- function(startDate = "null",
       gc()
       
       # Get site metadata
-      sites.DR <- dataRetrieval::whatWQPsites(
-        siteid = unique(results.DR$MonitoringLocationIdentifier)
+      sites.DR <- suppressMessages(
+        dataRetrieval::whatWQPsites(
+          siteid = unique(results.DR$MonitoringLocationIdentifier)
+        )
       )
       
       # Get project metadata
-      projects.DR <- dataRetrieval::readWQPdata(
-        siteid = unique(results.DR$MonitoringLocationIdentifier),
-        WQPquery,
-        ignore_attributes = TRUE,
-        service = "Project"
+      projects.DR <- suppressMessages(
+        dataRetrieval::readWQPdata(
+          siteid = unique(results.DR$MonitoringLocationIdentifier),
+          WQPquery,
+          ignore_attributes = TRUE,
+          service = "Project"
+        )
       )
       
       # Join results, sites, projects
@@ -774,9 +790,11 @@ TADA_DataRetrieval <- function(startDate = "null",
       # Retrieve all 3 profiles
       print("Downloading WQP query results. This may take some time depending upon the query size.")
       print(WQPquery)
-      results.DR <- dataRetrieval::readWQPdata(WQPquery,
-                                               dataProfile = "resultPhysChem",
-                                               ignore_attributes = TRUE
+      results.DR <- suppressMessages(
+        dataRetrieval::readWQPdata(WQPquery,
+                                   dataProfile = "resultPhysChem",
+                                   ignore_attributes = TRUE
+        )
       )
       
       # check if any results are available
@@ -784,11 +802,13 @@ TADA_DataRetrieval <- function(startDate = "null",
         print("Returning empty results dataframe: Your WQP query returned no results (no data available). Try a different query. Removing some of your query filters OR broadening your search area may help.")
         TADAprofile.clean <- results.DR
       } else {
-        sites.DR <- dataRetrieval::whatWQPsites(WQPquery)
+        sites.DR <- suppressMessages(dataRetrieval::whatWQPsites(WQPquery))
         
-        projects.DR <- dataRetrieval::readWQPdata(WQPquery,
-                                                  ignore_attributes = TRUE,
-                                                  service = "Project"
+        projects.DR <- suppressMessages(
+          dataRetrieval::readWQPdata(WQPquery,
+                                     ignore_attributes = TRUE,
+                                     service = "Project"
+          )
         )
         
         TADAprofile <- TADA_JoinWQPProfiles(

From 27038fb5b7b9553299ac2343bcb2f789d1731bbb Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Thu, 9 Jan 2025 09:35:36 -0800
Subject: [PATCH 12/35] Include progress bar internals

---
 R/DataDiscoveryRetrieval.R | 69 ++++++++++++++++++++++++--------------
 1 file changed, 43 insertions(+), 26 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 52c6f789..cd09ccad 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -967,11 +967,13 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
       small_site_chunk <- subset(smallsitesgrp$MonitoringLocationIdentifier,
                                  smallsitesgrp$group == i)
       # Query result data
-      results_small <- dataRetrieval::readWQPdata(
-        siteid = small_site_chunk,
-        WQPquery,
-        dataProfile = "resultPhysChem",
-        ignore_attributes = TRUE
+      results_small <- suppressMessages(
+        dataRetrieval::readWQPdata(
+          siteid = small_site_chunk,
+          WQPquery,
+          dataProfile = "resultPhysChem",
+          ignore_attributes = TRUE
+        )
       ) %>%
         dplyr::mutate(across(everything(), as.character))
       
@@ -979,42 +981,57 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
       if (dim(results_small)[1] > 0) {
         df_small <- dplyr::bind_rows(df_small, results_small)
       }
+      
+      # Update progress
+      setTxtProgressBar(pb = small_prog_bar, value = nrow(df_small))
     }
+    # Close progress bar when complete
+    close(small_prog_bar)
     
     rm(smallsites, smallsitesgrp)
     gc()
+  }
+  
+  # Large sites (>= maxrecs) next:
+  if (dim(bigsites)[1] > 0) {
+    print(
+      paste0("Downloading data from sites with greater than ",
+             maxrecs,
+             " results, chunking queries by site.")
+    )
     
-    # Large sites (>= maxrecs) next:
-    if (dim(bigsites)[1] > 0) {
-      print(
-        paste0("Downloading data from sites with greater than ",
-               maxrecs,
-               " results, chunking queries by site.")
-      )
-      
-      # Unique site IDs
-      bsitesvec <- unique(bigsites$MonitoringLocationIdentifier)
-      
-      # For each site
-      for (i in 1:length(bsitesvec)) {
-        # Download each site's data individually
-        results_big <- dataRetrieval::readWQPdata(
+    big_prog_bar <- txtProgressBar(min = 0, max = sum(bigsites$tot_n), style = 3)
+    
+    # Unique site IDs
+    bsitesvec <- unique(bigsites$MonitoringLocationIdentifier)
+    
+    # For each site
+    for (i in 1:length(bsitesvec)) {
+      # Download each site's data individually
+      results_big <- suppressMessages(
+        dataRetrieval::readWQPdata(
           siteid = bsitesvec[i],
           WQPquery,
           dataProfile = "resultPhysChem",
           ignore_attributes = TRUE
-        ) %>%
-          dplyr::mutate(across(everything(), as.character))
-        
-        if (dim(results_big)[1] > 0) {
-          df_big <- dplyr::bind_rows(df_big, results_big)
-        }
+        )
+      )%>%
+        dplyr::mutate(across(everything(), as.character))
+      
+      if (dim(results_big)[1] > 0) {
+        df_big <- dplyr::bind_rows(df_big, results_big)
       }
+      # Update progress
+      setTxtProgressBar(pb = big_prog_bar, value = nrow(df_big))
     }
+    # Close progress bar when complete
+    close(big_prog_bar)
+    
     rm(bigsites)
     gc()
   }
   
+  
   df_out <- bind_rows(df_small, df_big)
   
   return(df_out)

From 02de221d3ac253cb286d50b0fe46d8ed9cec20c3 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Thu, 9 Jan 2025 09:36:17 -0800
Subject: [PATCH 13/35] Update DataDiscoveryRetrieval.R

---
 R/DataDiscoveryRetrieval.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index cd09ccad..03721dc0 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -962,6 +962,8 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
              " results by grouping them together.")
     )
     
+    small_prog_bar <- txtProgressBar(min = 0, max = sum(smallsites$tot_n), style = 3)
+    
     # Download the data for each group
     for (i in 1:max(smallsitesgrp$group)) {
       small_site_chunk <- subset(smallsitesgrp$MonitoringLocationIdentifier,

From 6e16a9bf556e67e5059cc7590cdac0e053b651a7 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Fri, 10 Jan 2025 10:53:13 -0800
Subject: [PATCH 14/35] Apply suggestions from code review

Co-authored-by: Katie/Ryn Willi (she/her) <49761053+kathryn-willi@users.noreply.github.com>
---
 R/DataDiscoveryRetrieval.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 03721dc0..80ab5bf9 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -707,7 +707,7 @@ TADA_DataRetrieval <- function(startDate = "null",
     }
     
     # Query info on available data
-    query_avail <- supressMessages(dataRetrieval::whatWQPdata(WQPquery))
+    query_avail <- suppressMessages(dataRetrieval::whatWQPdata(WQPquery))
     
     site_count <- length(query_avail$MonitoringLocationIdentifier)
     
@@ -1034,7 +1034,7 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
   }
   
   
-  df_out <- bind_rows(df_small, df_big)
+  df_out <- dplyr::bind_rows(df_small, df_big)
   
   return(df_out)
 }

From 0c42ea505b79847e63f3be2beca63a83e83aafcc Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Fri, 10 Jan 2025 11:27:30 -0800
Subject: [PATCH 15/35] Apply suggestions from code review

---
 R/DataDiscoveryRetrieval.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 80ab5bf9..bdf17323 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -498,7 +498,7 @@ TADA_DataRetrieval <- function(startDate = "null",
         # Get site metadata
         sites.DR <- clipped_sites_sf %>%
           as_tibble() %>%
-          select(-geometry)
+          dplyr::select(-geometry)
         
         # Get project metadata
         projects.DR <- suppressMessages(
@@ -562,7 +562,7 @@ TADA_DataRetrieval <- function(startDate = "null",
         # Get site metadata
         sites.DR <- clipped_sites_sf %>%
           as_tibble() %>%
-          select(-geometry)
+          dplyr::select(-geometry)
         
         # Get project metadata
         projects.DR <- suppressMessages(

From 21cf9c5580db099f02dc013b4cc5ac9b9ad6100f Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Fri, 10 Jan 2025 11:31:39 -0800
Subject: [PATCH 16/35] Update DataDiscoveryRetrieval.R

---
 R/DataDiscoveryRetrieval.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index bdf17323..fa4ef4d0 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -497,7 +497,7 @@ TADA_DataRetrieval <- function(startDate = "null",
         
         # Get site metadata
         sites.DR <- clipped_sites_sf %>%
-          as_tibble() %>%
+          dplyr::as_tibble() %>%
           dplyr::select(-geometry)
         
         # Get project metadata
@@ -561,7 +561,7 @@ TADA_DataRetrieval <- function(startDate = "null",
         
         # Get site metadata
         sites.DR <- clipped_sites_sf %>%
-          as_tibble() %>%
+          dplyr::as_tibble() %>%
           dplyr::select(-geometry)
         
         # Get project metadata

From 152e979eb7fbe0a6f75b11734313d8b4afc2af4e Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Fri, 17 Jan 2025 12:16:24 -0800
Subject: [PATCH 17/35] http errors

---
 R/DataDiscoveryRetrieval.R | 90 +++++++++++++++++++++++++++++---------
 1 file changed, 69 insertions(+), 21 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index fa4ef4d0..5817e519 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -249,15 +249,15 @@ TADA_DataRetrieval <- function(startDate = "null",
     stop("A tribal_area_type is required if tribe_name_parcel is provided.")
   }
   
-  # Set query parameters
-  WQPquery <- list()
-  
   # If an sf object OR tribal info are provided they will be the basis of the query
   # (The tribal data handling uses sf objects as well)
   if( (!is.null(aoi_sf) & inherits(aoi_sf, "sf")) | (tribal_area_type != "null") ){
     
     # Build the non-sf part of the query:
     
+    # Set query parameters
+    WQPquery <- list()
+    
     # StartDate
     if (length(startDate) > 1) {
       if (is.na(suppressWarnings(lubridate::parse_date_time(startDate[1], orders = "ymd")))) {
@@ -409,25 +409,55 @@ TADA_DataRetrieval <- function(startDate = "null",
     input_bbox <- sf::st_bbox(aoi_sf)
     
     # Query info on available data within the bbox
-    bbox_avail <- suppressMessages(
-      dataRetrieval::whatWQPdata(
-        WQPquery,
-        bBox = c(input_bbox$xmin, input_bbox$ymin, input_bbox$xmax, input_bbox$ymax)
-      )
+    # Don't want to print every message that's returned by WQP
+    quiet_whatWQPdata <- purrr::quietly(dataRetrieval::whatWQPdata)
+    
+    # Try getting WQP info
+    quiet_bbox_avail <- quiet_whatWQPdata(
+      WQPquery,
+      bBox = c(input_bbox$xmin, input_bbox$ymin, input_bbox$xmax, input_bbox$ymax)
     )
     
+    # Alert & stop if an http error was received
+    if(is.null(quiet_bbox_avail$result)){
+      
+      stop_message <- quiet_bbox_avail$messages %>%
+        grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
+        paste("\n", ., collapse = "") %>%
+        paste("The WQP request returned a NULL with the following message(s): \n",
+              .,
+              collapse = "\n")
+      
+      stop(stop_message)
+      
+    }
+    
+    # Use result only
+    bbox_avail <- quiet_bbox_avail$result
+    
     # Check if any sites are within the aoi
     if ( (nrow(bbox_avail) > 0 ) == FALSE) {
       stop("No monitoring sites were returned within your area of interest (no data available).")
     }
     
+    quiet_bbox_sites <- quiet_whatWQPdata(
+      siteid = bbox_avail$MonitoringLocationIdentifier
+    )
+    
+    if(is.null(quiet_bbox_sites$result)){
+      
+      stop_message <- quiet_bbox_sites$messages %>%
+        grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
+        paste("\n", ., collapse = "") %>%
+        paste("The WQP request returned a NULL with the following message(s): \n",
+              .,
+              collapse = "\n")      
+      stop(stop_message)
+      
+    }
+    
     # Reformat returned info as sf
-    bbox_sites_sf <- suppressMessages(
-      dataRetrieval::whatWQPsites(
-        siteid = bbox_avail$MonitoringLocationIdentifier
-      )
-    ) %>%
-      TADA_MakeSpatial(., crs = 4326)
+    bbox_sites_sf <- TADA_MakeSpatial(quiet_bbox_sites$result, crs = 4326)
     
     # Subset sites to only within shapefile and get IDs
     clipped_sites_sf <- bbox_sites_sf[aoi_sf, ]
@@ -516,7 +546,7 @@ TADA_DataRetrieval <- function(startDate = "null",
           Sites = sites.DR,
           Projects = projects.DR
         ) %>% dplyr::mutate(
-          across(tidyselect::everything(), as.character)
+          dplyr::across(tidyselect::everything(), as.character)
         )
         
         # run TADA_AutoClean function
@@ -580,7 +610,7 @@ TADA_DataRetrieval <- function(startDate = "null",
           Sites = sites.DR,
           Projects = projects.DR
         ) %>% dplyr::mutate(
-          across(tidyselect::everything(), as.character)
+          dplyr::across(tidyselect::everything(), as.character)
         )
         
         # Run TADA_AutoClean function
@@ -707,7 +737,25 @@ TADA_DataRetrieval <- function(startDate = "null",
     }
     
     # Query info on available data
-    query_avail <- suppressMessages(dataRetrieval::whatWQPdata(WQPquery))
+    # Don't want to print every message that's returned by WQP
+    quiet_whatWQPdata <- purrr::quietly(dataRetrieval::whatWQPdata)
+    
+    quiet_query_avail <- quiet_whatWQPdata(WQPquery)
+    
+    if(is.null(quiet_query_avail$result)){
+      
+      stop_message <- quiet_query_avail$messages %>%
+        grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
+        paste("\n", ., collapse = "") %>%
+        paste("The WQP request returned a NULL with the following message(s): \n",
+              .,
+              collapse = "\n")
+      
+      stop(stop_message)
+      
+    }
+    
+    query_avail <- quiet_query_avail$result
     
     site_count <- length(query_avail$MonitoringLocationIdentifier)
     
@@ -771,7 +819,7 @@ TADA_DataRetrieval <- function(startDate = "null",
         Sites = sites.DR,
         Projects = projects.DR
       ) %>% dplyr::mutate(
-        across(tidyselect::everything(), as.character)
+        dplyr::across(tidyselect::everything(), as.character)
       )
       
       # run TADA_AutoClean function
@@ -816,7 +864,7 @@ TADA_DataRetrieval <- function(startDate = "null",
           Sites = sites.DR,
           Projects = projects.DR
         ) %>% dplyr::mutate(
-          across(tidyselect::everything(), as.character)
+          dplyr::across(tidyselect::everything(), as.character)
         )
         
         # run TADA_AutoClean function
@@ -977,7 +1025,7 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
           ignore_attributes = TRUE
         )
       ) %>%
-        dplyr::mutate(across(everything(), as.character))
+        dplyr::mutate(dplyr::across(everything(), as.character))
       
       # If data is returned, stack with what's already been retrieved
       if (dim(results_small)[1] > 0) {
@@ -1018,7 +1066,7 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
           ignore_attributes = TRUE
         )
       )%>%
-        dplyr::mutate(across(everything(), as.character))
+        dplyr::mutate(dplyr::across(everything(), as.character))
       
       if (dim(results_big)[1] > 0) {
         df_big <- dplyr::bind_rows(df_big, results_big)

From fd3019faa910ab4cc5c2cf66a29861be5603a3ac Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Fri, 17 Jan 2025 15:54:01 -0800
Subject: [PATCH 18/35] TADA_TribalOptions + vignette + logic

---
 R/DataDiscoveryRetrieval.R | 100 +++++++++++++++++++++++++++++++++----
 vignettes/TADAModule1.Rmd  |  23 ++++++++-
 2 files changed, 110 insertions(+), 13 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 5817e519..f972d2d5 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -205,7 +205,7 @@ TADA_DataRetrieval <- function(startDate = "null",
   # If both an sf object and tribe information are provided it's unclear what
   # the priority should be for the query
   if( !is.null(aoi_sf) &
-      ( (tribal_area_type != "null") | (tribe_name_parcel != "null") ) ){
+      any( (tribal_area_type != "null") | (tribe_name_parcel != "null") )){
     stop(
       paste0(
         "Both sf data and tribal information have been provided. ",
@@ -245,7 +245,7 @@ TADA_DataRetrieval <- function(startDate = "null",
   }
   
   # Insufficient tribal info provided
-  if( (tribal_area_type == "null") & (tribe_name_parcel != "null") ){
+  if( (tribal_area_type == "null") & all(tribe_name_parcel != "null") ){
     stop("A tribal_area_type is required if tribe_name_parcel is provided.")
   }
   
@@ -365,15 +365,15 @@ TADA_DataRetrieval <- function(startDate = "null",
       ){
         
         # Get the relevant url
-        aoi_sf <- filter(map_service_urls,
-                         tribal_area == tribal_area_type)$url %>%
+        aoi_sf <- dplyr::filter(map_service_urls,
+                                tribal_area == tribal_area_type)$url %>%
           # Pull data
           arcgislayers::arc_open() %>%
           # Return sf
           arcgislayers::arc_select() %>%
           # If a value provided, then filter
-          {if ((tribe_name_parcel != "null") & (tribe_name_parcel != "null")) {
-            filter(., TRIBE_NAME %in% tribe_name_parcel)
+          {if (all(tribe_name_parcel != "null") ) {
+            dplyr::filter(., TRIBE_NAME %in% tribe_name_parcel)
           } else {
             .
           }}
@@ -381,12 +381,12 @@ TADA_DataRetrieval <- function(startDate = "null",
         # Otherwise filter by PARCEL_NO (Note that values in this col are not unique)
       } else if(tribal_area_type == "Alaska Native Allotments"){
         
-        aoi_sf <- filter(map_service_urls,
-                         tribal_area == tribal_area_type)$url %>%
+        aoi_sf <- dplyr::filter(map_service_urls,
+                                tribal_area == tribal_area_type)$url %>%
           arcgislayers::arc_open() %>%
           arcgislayers::arc_select() %>%
-          {if ((tribe_name_parcel != "null") & (tribe_name_parcel != "null")) {
-            filter(., PARCEL_NO %in% tribe_name_parcel)
+          {if (all(tribe_name_parcel != "null")) {
+            dplyr::filter(., PARCEL_NO %in% tribe_name_parcel)
           } else {
             .
           }}
@@ -440,7 +440,10 @@ TADA_DataRetrieval <- function(startDate = "null",
       stop("No monitoring sites were returned within your area of interest (no data available).")
     }
     
-    quiet_bbox_sites <- quiet_whatWQPdata(
+    
+    quiet_whatWQPsites <- purrr::quietly(dataRetrieval::whatWQPsites)
+    
+    quiet_bbox_sites <- quiet_whatWQPsites(
       siteid = bbox_avail$MonitoringLocationIdentifier
     )
     
@@ -882,6 +885,81 @@ TADA_DataRetrieval <- function(startDate = "null",
   }
 }
 
+#' Access options available for querying tribal spatial data with `TADA_DataRetrieval()`.
+#' 
+#' @description
+#' This function provides access to [six layer datasets](https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer)
+#' containing spatial data related to tribal lands: "Alaska Native Allotments",
+#' "Alaska Native Villages", "American Indian Reservations", "Off-reservation Trust Lands",
+#' "Oklahoma Tribal Statistical Areas", and "Virginia Federally Recognized Tribes".
+#' These datasets are used by `TADA_DataRetrieval()` when retrieving spatial data
+#' for tribal lands specified by the user. 
+#' 
+#' The purpose of `TADA_TribalOptions()` is to allow the user to review the available
+#' data in those datasets and identify the records they would like to query with
+#' `TADA_DataRetrieval()`.
+#' 
+#' An interactive map of the six layer datasets is available on ArcGIS Online Map
+#' Viewer here: https://www.arcgis.com/apps/mapviewer/index.html?url=https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer&source=sd
+#' 
+#' @param tribal_area_type A character string. Must be one of the six tribal
+#' spatial layers: "Alaska Native Allotments", "Alaska Native Villages", 
+#' "American Indian Reservations", "Off-reservation Trust Lands",
+#' "Oklahoma Tribal Statistical Areas", or "Virginia Federally Recognized Tribes".
+#' 
+#' @param return_sf Logical. Should the function return the dataset as an `sf`
+#' object (TRUE) or a data frame (FALSE)? Defaults to FALSE.
+#' 
+#' @returns A data frame or `sf` object containing the specified layer from the EPA
+#' Map Service.
+#' 
+#' @note
+#' Alaska Native Villages and Virginia Federally Recognized Tribes are point
+#' geometries in the Map Service, not polygons. At the time of this writing they
+#' do not return any data when used for WQP bbox queries.
+#' 
+#' @export
+#' 
+#' @seealso [TADA_DataRetrieval()]
+#' 
+
+TADA_TribalOptions <- function(tribal_area_type, return_sf = FALSE){
+  
+  # Make a reference table for tribal area type + url matching
+  map_service_urls <- tibble::tribble(
+    ~tribal_area,                            ~url,
+    "Alaska Native Allotments",              "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/0",
+    "Alaska Native Villages",                "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/1",
+    "American Indian Reservations",          "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/2",
+    "Off-reservation Trust Lands",           "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/3",
+    "Oklahoma Tribal Statistical Areas",     "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/4",
+    "Virginia Federally Recognized Tribes",  "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/5"
+  )
+  
+  # Confirm usable string provided
+  if( !(tribal_area_type %in% map_service_urls$tribal_area) ){
+    stop("tribal_area_type must match one of the six tribal spatial layer names.")
+  }
+  
+  # Query Map Service
+  tribal_area_sf <- dplyr::filter(map_service_urls,
+                                  tribal_area == tribal_area_type)$url %>%
+    arcgislayers::arc_open() %>%
+    # Return sf
+    arcgislayers::arc_select() %>%
+    sf::st_make_valid()
+  
+  # Convert to df if needed, export
+  if(return_sf == FALSE){
+    return(
+      as.data.frame(tribal_area_sf) %>%
+        sf::st_drop_geometry()
+    )
+  } else {
+    return(tribal_area_sf)
+  }
+  
+}
 
 #' Read in WQP data using the Water Quality Portal (WQP) web services
 #'
diff --git a/vignettes/TADAModule1.Rmd b/vignettes/TADAModule1.Rmd
index 96ed73f2..e7aafc03 100644
--- a/vignettes/TADAModule1.Rmd
+++ b/vignettes/TADAModule1.Rmd
@@ -93,7 +93,7 @@ package functions. It joins three WQP profiles: Site, Sample Results
 data in the Characteristic, Speciation, Fraction, and Unit fields to
 uppercase and addresses result values that include special characters.
 
-This function uses the same inputs as the dataRetrieval `readWQPdata`
+This function accepts the same inputs as the dataRetrieval `readWQPdata`
 function. `readWQPdata` does not restrict the characteristics pulled
 from [Water Quality Portal (WQP)](https://www.waterqualitydata.us/).
 
@@ -124,6 +124,16 @@ Data retrieval filters include:
 
 -   characteristicType
 
+In addition to these filters, TADA_DataRetrieval accepts additional
+geospatial-related filters that are not included in the dataRetrieval
+`readWQPdata` function:
+
+-   aoi_sf
+
+-   tribal_area_type
+
+-   tribe_name_parcel
+
 The default TADA_DataRetrieval function automatically runs the
 **TADA_AutoClean** function. In this example, we will set
 **TADA_AutoClean = FALSE** and run it as a separate step in the work
@@ -159,6 +169,15 @@ Tips:
     identifiers for monitoring locations within USGS NWIS or EPA's WQX
     databases separately.
 
+3.  The aoi_sf and tribal arguments are meant to be used on their own.
+    For example, if both an aoi_sf argument and tribal information are
+    provided an error is returned because it's unclear what the priority
+    location should be for the query. Similarly, aoi_sf and
+    tribal_area_type are not meant to be used with location-related
+    filters (e.g., statecode, siteid). In these instances a warning is
+    returned but the query proceeds by using only the aoi_sf or
+    tribal_area_type information.
+
 Additional resources:
 
 -   Review function documentation by entering the following code into
@@ -201,7 +220,7 @@ du Lac Band), 5) Pueblo of Tesuque, and 6) The Chickasaw Nation
 We will move forward with this example in the remainder of the vignette.
 
 ```{r TADA_DataRetrieval}
-TADAProfile <- TADA_DataRetrieval(organization = c("REDLAKE_WQX", "SFNOES_WQX", "PUEBLO_POJOAQUE", "FONDULAC_WQX", "PUEBLOOFTESUQUE", "CNENVSER"), startDate = "2018-01-01", endDate = "2019-01-01", applyautoclean = FALSE)
+TADAProfile <- TADA_DataRetrieval(organization = c("REDLAKE_WQX", "SFNOES_WQX", "PUEBLO_POJOAQUE", "FONDULAC_WQX", "PUEBLOOFTESUQUE", "CNENVSER"), startDate = "2018-01-01", endDate = "2019-01-01", applyautoclean = FALSE, ask = FALSE)
 ```
 
 ## USGS dataRetrieval

From a8dcb05d67dc878128cb3b70f536c9fbcce973dc Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Tue, 21 Jan 2025 14:16:20 -0800
Subject: [PATCH 19/35] Vignette updates

---
 R/DataDiscoveryRetrieval.R |  2 +-
 vignettes/TADAModule1.Rmd  | 85 ++++++++++++++++++++++++++++++++++----
 2 files changed, 78 insertions(+), 9 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index f972d2d5..8e8f9859 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -54,7 +54,7 @@
 #' @param organization A string of letters and/or numbers (some additional characters also possible) used to signify an organization with data in the Water Quality Portal. See https://www.waterqualitydata.us/Codes/organization for options.
 #' @param project A string of letters and/or numbers (some additional characters also possible) used to signify a project with data in the Water Quality Portal. See https://www.waterqualitydata.us/Codes/project for options.
 #' @param providers Leave blank to include all, or specify "STEWARDS", "STORET" (i.e., WQX), and/or "NWIS". See https://www.waterqualitydata.us/Codes/providers for options.
-#' @param maxrecs Maximum number of records to query at once.
+#' @param maxrecs Maximum number of records to query at once (i.e., without breaking into smaller queries).
 #' @param ask A logical value indicating whether the user should be asked for approval before downloads begin.
 #' @param applyautoclean Logical, defaults to TRUE. Applies TADA_AutoClean function on the returned data profile. Suggest switching to FALSE for queries that are expected to be large.
 #'
diff --git a/vignettes/TADAModule1.Rmd b/vignettes/TADAModule1.Rmd
index e7aafc03..23042ed5 100644
--- a/vignettes/TADAModule1.Rmd
+++ b/vignettes/TADAModule1.Rmd
@@ -196,7 +196,7 @@ Additional resources:
     Tutorial](https://waterdata.usgs.gov/blog/dataretrieval/)
 
 Use the code below to download data from the WQP using
-TADA_DataRetrieval. Edit the code chuck below to define your own WQP
+TADA_DataRetrieval. Edit the code chunk below to define your own WQP
 query inputs.
 
 Downloads using TADA_DataRetrieval will have the same columns each time,
@@ -217,12 +217,74 @@ This example includes monitoring data collected from Jan 2018 to Jan
 & Fox Nation, 3) Pueblo of Pojoaque, 4) Minnesota Chippewa Tribe (Fond
 du Lac Band), 5) Pueblo of Tesuque, and 6) The Chickasaw Nation
 
-We will move forward with this example in the remainder of the vignette.
-
 ```{r TADA_DataRetrieval}
 TADAProfile <- TADA_DataRetrieval(organization = c("REDLAKE_WQX", "SFNOES_WQX", "PUEBLO_POJOAQUE", "FONDULAC_WQX", "PUEBLOOFTESUQUE", "CNENVSER"), startDate = "2018-01-01", endDate = "2019-01-01", applyautoclean = FALSE, ask = FALSE)
 ```
 
+We will move forward with this example in the remainder of the vignette.
+
+We will first use a subset of this example to demonstrate using new
+TADA_DataRetrieval options that allow for **spatial** or
+**tribe-specific** queries:
+
+Focusing just on the "PUEBLO_POJOAQUE" organization, rerun the example
+above:
+
+```{r TADA_DataRetrieval_single}
+TADAProfile_single <- TADA_DataRetrieval(
+  organization = "PUEBLO_POJOAQUE",
+  startDate = "2018-01-01",
+  endDate = "2019-01-01",
+  applyautoclean = FALSE,
+  ask = FALSE
+)
+```
+
+The same results can now be obtained using a combination of the
+tribal_area_type and tribe_name_parcel arguments. Both must be used
+together. The tribal_area_type argument indicates which one of the [six
+layer
+datasets](https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer)
+of tribal land data to query within. tribe_name_parcel is the specific
+name of the tribal land of interest as listed in the layer. (Running
+TADA_TribalOptions("American Indian Reservations") could be used here to
+determine the correct spelling for this argument, as listed in the
+TRIBE_NAME column.)
+
+```{r TADA_DataRetrieval_tribal}
+TADAProfile_tribal <- TADA_DataRetrieval(
+  tribal_area_type = "American Indian Reservations",
+  tribe_name_parcel = "Pueblo of Pojoaque, New Mexico",
+  startDate = "2018-01-01",
+  endDate = "2019-01-01",
+  applyautoclean = FALSE,
+  ask = FALSE
+)
+# After removing attribute metadata they are equivalent:
+all.equal(data.frame(TADAProfile_single), data.frame(TADAProfile_tribal))
+```
+
+Additionally, the aoi_sf argument can be used to provide an sf spatial object
+as a query filter. We can match the output of the two short examples above like so:
+
+```{r TADA_DataRetrieval_spatial}
+TADAProfile_spatial <- TADA_DataRetrieval(
+  aoi_sf = tigris::native_areas() %>% filter(NAMELSAD == "Pueblo of Pojoaque"),
+  startDate = "2018-01-01",
+  endDate = "2019-01-01",
+  applyautoclean = FALSE,
+  ask = FALSE
+)
+
+all.equal(data.frame(TADAProfile_single), data.frame(TADAProfile_spatial))
+```
+
+**Note**: In this example the output data is identical from these three input
+methods. However, in some instances this may not be the case. This is because
+the tribal_area_type method is based on spatial data and so spatial boundaries
+must be taken into account when comparing query results. The same applies when
+using aoi_sf results.
+
 ## USGS dataRetrieval
 
 Uncomment below (optional) if you would like to review differences
@@ -236,19 +298,26 @@ as part of the data retrieval process.
 # dataRetrieval_example <- dataRetrieval::readWQPdata(organization = c("REDLAKE_WQX", "SFNOES_WQX", "PUEBLO_POJOAQUE", "FONDULAC_WQX", "PUEBLOOFTESUQUE", "CNENVSER"), startDate = "2018-01-01", endDate = "2019-01-01", ignore_attributes = TRUE)
 ```
 
-## TADA_BigDataRetrieval
+## Big Data Queries
 
 If you need to download a large amount of data from across a large area,
-and the TADA_DataRetrieval function is not working due to WQP timeout
-issues, then the **TADA_BigDataRetrieval** function may work better.
+the TADA_DataRetrieval function now handles this automatically. Whereas in the
+past there was a second function (TADA_BigDataRetrieval) to do this, the
+standard TADA_DataRetrieval function now checks the number of results in each
+query and uses similar methods as TADA_BigDataRetrieval when necessary.
 
-This function does multiple synchronous data calls to the WQP
+The function does multiple synchronous data calls to the WQP
 (waterqualitydata.us). It uses the WQP summary service to limit the
 sites downloaded to only those with relevant data. It pulls back data
 from set number of stations at a time and then joins the data back
 together to produce a single TADA compatible dataframe as the output.
 
-See ?TADA_BigDataRetrieval for more details. WARNING, some of the
+TADA_DataRetrieval now also prompts the user (when ask = TRUE) to confirm that
+they want to download the dataset. As part of this prompt the expected number
+of rows of data are provided to help in making the decision. As the downloads
+occur, a progress bar is shown as well.
+
+See ?TADA_DataRetrieval for more details. WARNING, some of the
 examples below can take multiple HOURS to run. The total run time
 depends on your query inputs.
 

From d8fe5f3b60893948b064cac7998162609c65187c Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Wed, 22 Jan 2025 15:21:18 -0800
Subject: [PATCH 20/35] Update testing, build, etc.

---
 .Rbuildignore                                |  2 +
 .gitignore                                   |  2 +
 NAMESPACE                                    |  1 +
 R/Utilities.R                                |  6 ++-
 man/TADA_DataRetrieval.Rd                    |  5 +++
 man/TADA_TribalOptions.Rd                    | 44 ++++++++++++++++++++
 man/ask_user.Rd                              | 17 ++++++++
 tests/testthat/test-CensoredDataSuite.R      |  7 ++--
 tests/testthat/test-DataDiscoveryRetrieval.R | 34 ++++++++-------
 tests/testthat/test-ResultFlagsDependent.R   |  2 +-
 tests/testthat/test-Tables.R                 |  2 +-
 tests/testthat/test-UnitConversions.R        |  3 +-
 vignettes/TADAModule1.Rmd                    | 10 ++---
 vignettes/TADAModule2.Rmd                    |  3 +-
 14 files changed, 108 insertions(+), 30 deletions(-)
 create mode 100644 man/TADA_TribalOptions.Rd
 create mode 100644 man/ask_user.Rd

diff --git a/.Rbuildignore b/.Rbuildignore
index 0b0fbfc0..4d9bb147 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -9,3 +9,5 @@ readme.html
 .gitignore
 ^articles
 ^vignettes/articles$
+^doc$
+^Meta$
diff --git a/.gitignore b/.gitignore
index ffaf1f85..c109a3ac 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,3 +26,5 @@ testing_log.txt
 
 # test data from AK
 AK_EPATADA_troubleshooting
+/doc/
+/Meta/
diff --git a/NAMESPACE b/NAMESPACE
index 9bd8f6cb..0b286c45 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -74,6 +74,7 @@ export(TADA_SimpleCensoredMethods)
 export(TADA_Stats)
 export(TADA_SubstituteDeprecatedChars)
 export(TADA_SummarizeColumn)
+export(TADA_TribalOptions)
 export(TADA_TwoCharacteristicScatterplot)
 export(TADA_UniqueCharUnitSpeciation)
 export(TADA_ViewATTAINS)
diff --git a/R/Utilities.R b/R/Utilities.R
index c1ca0f94..efa91f3b 100644
--- a/R/Utilities.R
+++ b/R/Utilities.R
@@ -964,7 +964,8 @@ TADA_RandomTestingData <- function(number_of_days = 1, choose_random_state = FAL
         startDate = as.character(random_start_date),
         endDate = as.character(end_date),
         statecode = state,
-        applyautoclean = TRUE
+        applyautoclean = TRUE,
+        ask = FALSE
       )
     }
 
@@ -973,7 +974,8 @@ TADA_RandomTestingData <- function(number_of_days = 1, choose_random_state = FAL
         startDate = as.character(random_start_date),
         endDate = as.character(end_date),
         statecode = state,
-        applyautoclean = FALSE
+        applyautoclean = FALSE,
+        ask = FALSE
       )
     }
 
diff --git a/man/TADA_DataRetrieval.Rd b/man/TADA_DataRetrieval.Rd
index 24b30c87..bcc02ad1 100644
--- a/man/TADA_DataRetrieval.Rd
+++ b/man/TADA_DataRetrieval.Rd
@@ -23,6 +23,7 @@ TADA_DataRetrieval(
   project = "null",
   providers = "null",
   maxrecs = 250000,
+  ask = TRUE,
   applyautoclean = TRUE
 )
 }
@@ -61,6 +62,10 @@ TADA_DataRetrieval(
 
 \item{providers}{Leave blank to include all, or specify "STEWARDS", "STORET" (i.e., WQX), and/or "NWIS". See https://www.waterqualitydata.us/Codes/providers for options.}
 
+\item{maxrecs}{Maximum number of records to query at once (i.e., without breaking into smaller queries).}
+
+\item{ask}{A logical value indicating whether the user should be asked for approval before downloads begin.}
+
 \item{applyautoclean}{Logical, defaults to TRUE. Applies TADA_AutoClean function on the returned data profile. Suggest switching to FALSE for queries that are expected to be large.}
 }
 \value{
diff --git a/man/TADA_TribalOptions.Rd b/man/TADA_TribalOptions.Rd
new file mode 100644
index 00000000..2cf6bb37
--- /dev/null
+++ b/man/TADA_TribalOptions.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/DataDiscoveryRetrieval.R
+\name{TADA_TribalOptions}
+\alias{TADA_TribalOptions}
+\title{Access options available for querying tribal spatial data with \code{TADA_DataRetrieval()}.}
+\usage{
+TADA_TribalOptions(tribal_area_type, return_sf = FALSE)
+}
+\arguments{
+\item{tribal_area_type}{A character string. Must be one of the six tribal
+spatial layers: "Alaska Native Allotments", "Alaska Native Villages",
+"American Indian Reservations", "Off-reservation Trust Lands",
+"Oklahoma Tribal Statistical Areas", or "Virginia Federally Recognized Tribes".}
+
+\item{return_sf}{Logical. Should the function return the dataset as an \code{sf}
+object (TRUE) or a data frame (FALSE)? Defaults to FALSE.}
+}
+\value{
+A data frame or \code{sf} object containing the specified layer from the EPA
+Map Service.
+}
+\description{
+This function provides access to \href{https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer}{six layer datasets}
+containing spatial data related to tribal lands: "Alaska Native Allotments",
+"Alaska Native Villages", "American Indian Reservations", "Off-reservation Trust Lands",
+"Oklahoma Tribal Statistical Areas", and "Virginia Federally Recognized Tribes".
+These datasets are used by \code{TADA_DataRetrieval()} when retrieving spatial data
+for tribal lands specified by the user.
+
+The purpose of \code{TADA_TribalOptions()} is to allow the user to review the available
+data in those datasets and identify the records they would like to query with
+\code{TADA_DataRetrieval()}.
+
+An interactive map of the six layer datasets is available on ArcGIS Online Map
+Viewer here: https://www.arcgis.com/apps/mapviewer/index.html?url=https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer&source=sd
+}
+\note{
+Alaska Native Villages and Virginia Federally Recognized Tribes are point
+geometries in the Map Service, not polygons. At the time of this writing they
+do not return any data when used for WQP bbox queries.
+}
+\seealso{
+\code{\link[=TADA_DataRetrieval]{TADA_DataRetrieval()}}
+}
diff --git a/man/ask_user.Rd b/man/ask_user.Rd
new file mode 100644
index 00000000..85380683
--- /dev/null
+++ b/man/ask_user.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/DataDiscoveryRetrieval.R
+\name{ask_user}
+\alias{ask_user}
+\title{Ask user to approve WQP downloads}
+\usage{
+ask_user(n_records)
+}
+\arguments{
+\item{n_records}{A numeric value indicating the number of records that will be downloaded from the WQP if the user decides to proceed.}
+}
+\description{
+Once record counts have been retrieved from the Water Quality Portal (WQP) for
+a query, this function is used to prompt the user to decide (i.e., "yes"/"no")
+whether the download should proceed. The user is also reminded of the limits of
+Microsoft Excel for row counts as a comparison.
+}
diff --git a/tests/testthat/test-CensoredDataSuite.R b/tests/testthat/test-CensoredDataSuite.R
index 8ad21839..644b99d4 100644
--- a/tests/testthat/test-CensoredDataSuite.R
+++ b/tests/testthat/test-CensoredDataSuite.R
@@ -1,10 +1,10 @@
 test_that("TADA_IDCensoredData orphans", {
-  cens.check <- TADA_DataRetrieval(statecode = "CO", startDate = "2021-01-01", endDate = "2022-01-01", characteristicName = c("Phosphorus", "Nitrate"))
+  cens.check <- TADA_DataRetrieval(statecode = "CO", startDate = "2021-01-01", endDate = "2022-01-01", characteristicName = c("Phosphorus", "Nitrate"), ask = FALSE)
   expect_true(all(!is.na(cens.check$TADA.CensoredData.Flag)))
 })
 
 test_that("TADA_SimpleCensoredMethods doesn't drop data", {
-  testdat <- TADA_DataRetrieval(statecode = "KS", startDate = "2021-01-01", endDate = "2022-01-01", characteristicName = c("Phosphorus", "Nitrate"))
+  testdat <- TADA_DataRetrieval(statecode = "KS", startDate = "2021-01-01", endDate = "2022-01-01", characteristicName = c("Phosphorus", "Nitrate"), ask = FALSE)
   cens.check <- TADA_SimpleCensoredMethods(testdat)
   expect_equal(dim(testdat)[1], dim(cens.check)[1])
 })
@@ -41,7 +41,8 @@ test_that("TADA_IDCensoredData copies det lim values to result values if applica
 test_that("TADA_IDCensoredData correctly handles specific text values such as ND", {
   df <- TADA_DataRetrieval(
     startDate = "2022-12-19",
-    endDate = "2022-12-20"
+    endDate = "2022-12-20",
+    ask = FALSE
   )
 
   df1 <- TADA_IDCensoredData(df)
diff --git a/tests/testthat/test-DataDiscoveryRetrieval.R b/tests/testthat/test-DataDiscoveryRetrieval.R
index 55ea2c37..bd96696a 100644
--- a/tests/testthat/test-DataDiscoveryRetrieval.R
+++ b/tests/testthat/test-DataDiscoveryRetrieval.R
@@ -270,7 +270,8 @@ test_that("TADA_DataRetrieval", {
       "USGS-054064785",
       "USGS-430305089260600"
     ),
-    characteristicName = "Phosphorus"
+    characteristicName = "Phosphorus",
+    ask = FALSE
   )
   # you could just pick the important columns:
   expect_true(all(c(
@@ -405,7 +406,8 @@ test_that("TADA_DataRetrieval", {
   check_autoclean_meters_works <- TADA_DataRetrieval(
     statecode = "UT",
     characteristicName = c("Ammonia", "Nitrate", "Nitrogen"),
-    startDate = "2021-01-01"
+    startDate = "2021-01-01",
+    ask = FALSE
   )
   expect_false("meters" %in% check_autoclean_meters_works$TADA.ResultMeasure.MeasureUnitCode)
 })
@@ -413,22 +415,22 @@ test_that("TADA_DataRetrieval", {
 # Testing that regular and big data retrieval return the same number of rows on an identical query.
 # cm edited to include start date on 2/27/23 because without this it takes too long to run
 # these tests, and may time out
-test_that("Reg&BigdataRetrieval", {
-  big <- TADA_BigDataRetrieval(characteristicName = "Algae, substrate rock/bank cover (choice list)", sampleMedia = "Water", siteType = "Stream", startDate = "2020-01-01", applyautoclean = FALSE)
-  reg <- TADA_DataRetrieval(characteristicName = "Algae, substrate rock/bank cover (choice list)", sampleMedia = "Water", siteType = "Stream", startDate = "2020-01-01", applyautoclean = FALSE)
-
-  expect_equal(nrow(big), nrow(reg))
-})
+# test_that("Reg&BigdataRetrieval", {
+#   # big <- TADA_BigDataRetrieval(characteristicName = "Algae, substrate rock/bank cover (choice list)", sampleMedia = "Water", siteType = "Stream", startDate = "2020-01-01", applyautoclean = FALSE)
+#   reg <- TADA_DataRetrieval(characteristicName = "Algae, substrate rock/bank cover (choice list)", sampleMedia = "Water", siteType = "Stream", startDate = "2020-01-01", applyautoclean = FALSE, ask = FALSE)
+# 
+#   expect_equal(nrow(big), nrow(reg))
+# })
 
 # Testing that dates work correctly in queries in big data retrieval
-test_that("BigdataRetrieval_daterange", {
-  startDate <- "2018-10-01"
-  endDate <- "2021-09-30"
-  big <- TADA_BigDataRetrieval(startDate = startDate, endDate = endDate, huc = c("04030202", "04030201"), characteristicName = "Escherichia coli", siteType = "Stream")
-  logic <- min(big$ActivityStartDate) >= as.Date(startDate, format = "%Y-%m-%d") & max(big$ActivityStartDate) <= as.Date(endDate, format = "%Y-%m-%d")
-
-  expect_true(logic)
-})
+# test_that("BigdataRetrieval_daterange", {
+#   startDate <- "2018-10-01"
+#   endDate <- "2021-09-30"
+#   # big <- TADA_BigDataRetrieval(startDate = startDate, endDate = endDate, huc = c("04030202", "04030201"), characteristicName = "Escherichia coli", siteType = "Stream")
+#   logic <- min(big$ActivityStartDate) >= as.Date(startDate, format = "%Y-%m-%d") & max(big$ActivityStartDate) <= as.Date(endDate, format = "%Y-%m-%d")
+# 
+#   expect_true(logic)
+# })
 
 
 # Testing that the TADA_JoinWQPProfiles() function in DataDiscoveryRetrieval.R
diff --git a/tests/testthat/test-ResultFlagsDependent.R b/tests/testthat/test-ResultFlagsDependent.R
index 1758c9e4..7e763453 100644
--- a/tests/testthat/test-ResultFlagsDependent.R
+++ b/tests/testthat/test-ResultFlagsDependent.R
@@ -1,7 +1,7 @@
 test_that("No NA's in dependent flag columns", {
   today <- Sys.Date()
   twoago <- as.character(today - 2 * 365)
-  testdat <- TADA_DataRetrieval(statecode = "UT", startDate = twoago, characteristicName = c("Nitrate", "Copper"), sampleMedia = "Water")
+  testdat <- TADA_DataRetrieval(statecode = "UT", startDate = twoago, characteristicName = c("Nitrate", "Copper"), sampleMedia = "Water", ask = FALSE)
   testdat <- TADA_ConvertResultUnits(testdat, transform = TRUE)
 
   testdat <- suppressWarnings(TADA_FlagFraction(testdat, clean = FALSE, flaggedonly = FALSE))
diff --git a/tests/testthat/test-Tables.R b/tests/testthat/test-Tables.R
index 5528758e..49a30910 100644
--- a/tests/testthat/test-Tables.R
+++ b/tests/testthat/test-Tables.R
@@ -1,5 +1,5 @@
 test_that("TADA_Stats suggestions complete", {
-  testdat <- TADA_DataRetrieval(statecode = "KS", startDate = "2021-01-01", endDate = "2022-01-01", characteristicName = c("Phosphorus", "Nitrate"))
+  testdat <- TADA_DataRetrieval(statecode = "KS", startDate = "2021-01-01", endDate = "2022-01-01", characteristicName = c("Phosphorus", "Nitrate"), ask = FALSE)
   check <- TADA_Stats(testdat)
   expect_true(all(!is.na(check$ND_Estimation_Method)))
 })
diff --git a/tests/testthat/test-UnitConversions.R b/tests/testthat/test-UnitConversions.R
index 1a09abc7..5a12e63c 100644
--- a/tests/testthat/test-UnitConversions.R
+++ b/tests/testthat/test-UnitConversions.R
@@ -63,7 +63,8 @@ test_that("TADA_ConvertDepthUnits converts meters to m", {
     organization = "USGS-UT",
     characteristicName = c("Ammonia", "Nitrate", "Nitrogen"),
     startDate = "2023-01-01",
-    endDate = "2023-03-01"
+    endDate = "2023-03-01",
+    ask = FALSE
   )
   check_depth_meters <- TADA_ConvertDepthUnits(check_depth_meters)
   expect_false("meters" %in% check_depth_meters$TADA.ActivityDepthHeightMeasure.MeasureUnitCode)
diff --git a/vignettes/TADAModule1.Rmd b/vignettes/TADAModule1.Rmd
index 23042ed5..8154041b 100644
--- a/vignettes/TADAModule1.Rmd
+++ b/vignettes/TADAModule1.Rmd
@@ -269,7 +269,7 @@ as a query filter. We can match the output of the two short examples above like
 
 ```{r TADA_DataRetrieval_spatial}
 TADAProfile_spatial <- TADA_DataRetrieval(
-  aoi_sf = tigris::native_areas() %>% filter(NAMELSAD == "Pueblo of Pojoaque"),
+  aoi_sf = tigris::native_areas() %>% dplyr::filter(NAMELSAD == "Pueblo of Pojoaque"),
   startDate = "2018-01-01",
   endDate = "2019-01-01",
   applyautoclean = FALSE,
@@ -322,13 +322,13 @@ examples below can take multiple HOURS to run. The total run time
 depends on your query inputs.
 
 ```{r BigdataRetrieval, eval = FALSE, results = 'hide'}
-# AK_AL_WaterTemp <- TADA_BigDataRetrieval(startDate = "2000-01-01", endDate = "2022-12-31", characteristicName = "Temperature, water", statecode = c("AK","AL"))
+# AK_AL_WaterTemp <- TADA_DataRetrieval(startDate = "2000-01-01", endDate = "2022-12-31", characteristicName = "Temperature, water", statecode = c("AK","AL"))
 
-# AllWaterTemp <- TADA_BigDataRetrieval(characteristicName = "Temperature, water")
+# AllWaterTemp <- TADA_DataRetrieval(characteristicName = "Temperature, water")
 
-# AllPhosphorus <- TADA_BigDataRetrieval(characteristicName = "Phosphorus")
+# AllPhosphorus <- TADA_DataRetrieval(characteristicName = "Phosphorus")
 
-# AllCT <- TADA_BigDataRetrieval(statecode = "CT")
+# AllCT <- TADA_DataRetrieval(statecode = "CT")
 ```
 
 ## Filter data based on media type
diff --git a/vignettes/TADAModule2.Rmd b/vignettes/TADAModule2.Rmd
index 97d4677d..a5a9d810 100644
--- a/vignettes/TADAModule2.Rmd
+++ b/vignettes/TADAModule2.Rmd
@@ -155,7 +155,8 @@ TADA_dataframe <- TADA_DataRetrieval(
   endDate = "2020-12-31",
   characteristicName = "pH",
   countycode = "US:08:069",
-  applyautoclean = TRUE
+  applyautoclean = TRUE,
+  ask = FALSE
 )
 ```
 

From a0b1b4d7c204edefb5903ff81be20bb421d29c13 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Wed, 22 Jan 2025 15:54:14 -0800
Subject: [PATCH 21/35] Run styler::style_pkg()

---
 R/DataDiscoveryRetrieval.R                   | 559 +++++++++----------
 R/Maintenance.R                              |   8 +-
 tests/testthat/test-DataDiscoveryRetrieval.R |   4 +-
 tests/testthat/test-ResultFlagsIndependent.R |  32 +-
 4 files changed, 299 insertions(+), 304 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 8e8f9859..67606762 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -5,7 +5,7 @@
 #' exceptions of endDate and startDate match the web service call format from the
 #' online WQP GUI. endDate and startDate match the format suggested in USGS's
 #' dataRetrieval package (endDate = "YYYY-MM-DD"), which is a more familiar date
-#' format for R users than the WQP GUI's endDateHi = "MM-DD-YYYY". aoi_sf, 
+#' format for R users than the WQP GUI's endDateHi = "MM-DD-YYYY". aoi_sf,
 #' tribal_area_type, and tribe_name_parcel do not have corresponding inputs in
 #' the web service.
 #'
@@ -19,7 +19,7 @@
 #' of the query fields.
 #' characteristicName and Characteristic Group also work as an AND, therefore the
 #' characteristicName must fall within the Characteristic Group when both are entered.
-#' 
+#'
 #' aoi_sf cannot be used with tribal_area_type. If countrycode, countycode, huc,
 #' siteid, or statecode are used with aoi_sf or tribal_area_type they will be ignored
 #' under the assumption that the sf object or tribal location are the intended
@@ -65,7 +65,7 @@
 #' geometries in the Map Service, not polygons. At the time of this writing they
 #' do not return any data when used for WQP bbox queries and so are set to return
 #' errors when used with this function.
-#' 
+#'
 #' @export
 #'
 #' @examples
@@ -199,13 +199,12 @@ TADA_DataRetrieval <- function(startDate = "null",
                                maxrecs = 250000,
                                ask = TRUE,
                                applyautoclean = TRUE) {
-  
   # Check for incomplete or inconsistent inputs:
-  
+
   # If both an sf object and tribe information are provided it's unclear what
   # the priority should be for the query
-  if( !is.null(aoi_sf) &
-      any( (tribal_area_type != "null") | (tribe_name_parcel != "null") )){
+  if (!is.null(aoi_sf) &
+    any((tribal_area_type != "null") | (tribe_name_parcel != "null"))) {
     stop(
       paste0(
         "Both sf data and tribal information have been provided. ",
@@ -213,29 +212,33 @@ TADA_DataRetrieval <- function(startDate = "null",
       )
     )
   }
-  
+
   # Check for other arguments that indicate location. Function will ignore
   # these inputs but warn the user
-  if( 
+  if (
     # sf object provided
-    (!is.null(aoi_sf) & inherits(aoi_sf, "sf")) & 
-    # with additional location info
-    any( (countrycode != "null"), (countycode != "null"), (huc != "null"),
-         (siteid != "null"), (statecode != "null") )
-  ){
+    (!is.null(aoi_sf) & inherits(aoi_sf, "sf")) &
+      # with additional location info
+      any(
+        (countrycode != "null"), (countycode != "null"), (huc != "null"),
+        (siteid != "null"), (statecode != "null")
+      )
+  ) {
     warning(
       paste0(
         "Location information has been provided in addition to an sf object. ",
         "Only the sf object will be used in the query."
       )
     )
-  } else if(
+  } else if (
     # Tribe info provided
-    (tribal_area_type != "null") & 
-    # with additional location info
-    any( (countrycode != "null"), (countycode != "null"), (huc != "null"),
-         (siteid != "null"), (statecode != "null") )
-  ){
+    (tribal_area_type != "null") &
+      # with additional location info
+      any(
+        (countrycode != "null"), (countycode != "null"), (huc != "null"),
+        (siteid != "null"), (statecode != "null")
+      )
+  ) {
     warning(
       paste0(
         "Location information has been provided in addition to tribal information. ",
@@ -243,21 +246,20 @@ TADA_DataRetrieval <- function(startDate = "null",
       )
     )
   }
-  
+
   # Insufficient tribal info provided
-  if( (tribal_area_type == "null") & all(tribe_name_parcel != "null") ){
+  if ((tribal_area_type == "null") & all(tribe_name_parcel != "null")) {
     stop("A tribal_area_type is required if tribe_name_parcel is provided.")
   }
-  
+
   # If an sf object OR tribal info are provided they will be the basis of the query
   # (The tribal data handling uses sf objects as well)
-  if( (!is.null(aoi_sf) & inherits(aoi_sf, "sf")) | (tribal_area_type != "null") ){
-    
+  if ((!is.null(aoi_sf) & inherits(aoi_sf, "sf")) | (tribal_area_type != "null")) {
     # Build the non-sf part of the query:
-    
+
     # Set query parameters
     WQPquery <- list()
-    
+
     # StartDate
     if (length(startDate) > 1) {
       if (is.na(suppressWarnings(lubridate::parse_date_time(startDate[1], orders = "ymd")))) {
@@ -324,12 +326,11 @@ TADA_DataRetrieval <- function(startDate = "null",
       }
       WQPquery <- c(WQPquery, endDate = endDate)
     }
-    
+
     # sf AOI prep for query
-    
+
     # If tribe info is provided then grab the corresponding sf object:
-    if(tribal_area_type != "null"){
-      
+    if (tribal_area_type != "null") {
       # Make a reference table for tribal area type + url matching
       # (options that don't return results are commented out for now)
       map_service_urls <- tibble::tribble(
@@ -338,165 +339,167 @@ TADA_DataRetrieval <- function(startDate = "null",
         # "Alaska Native Villages",                "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/1",
         "American Indian Reservations",          "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/2",
         "Off-reservation Trust Lands",           "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/3",
-        "Oklahoma Tribal Statistical Areas",     "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/4"# ,
+        "Oklahoma Tribal Statistical Areas",     "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/4" # ,
         # "Virginia Federally Recognized Tribes",  "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/5"
       )
-      
+
       # Keep to a single type:
-      if(length(tribal_area_type) > 1){
+      if (length(tribal_area_type) > 1) {
         stop("tribal_area_type must be of length 1.")
       }
-      
+
       # These two layers will not return any data when used for bboxes
-      if(tribal_area_type == "Alaska Native Villages"){
+      if (tribal_area_type == "Alaska Native Villages") {
         stop("Alaska Native Villages data are centroid points, not spatial boundaries.")
-      } else if(tribal_area_type == "Virginia Federally Recognized Tribes") {
+      } else if (tribal_area_type == "Virginia Federally Recognized Tribes") {
         stop("Federally recognized tribal entities in Virginia do not have any available spatial boundaries.")
       }
-      
+
       # These area types allow filtering by TRIBE_NAME (unique within each type)
-      if(tribal_area_type %in% c(
+      if (tribal_area_type %in% c(
         # "Alaska Native Villages",
         "American Indian Reservations",
         "Off-reservation Trust Lands",
-        "Oklahoma Tribal Statistical Areas"#,
+        "Oklahoma Tribal Statistical Areas" # ,
         # "Virginia Federally Recognized Tribes"
       )
-      ){
-        
+      ) {
         # Get the relevant url
-        aoi_sf <- dplyr::filter(map_service_urls,
-                                tribal_area == tribal_area_type)$url %>%
+        aoi_sf <- dplyr::filter(
+          map_service_urls,
+          tribal_area == tribal_area_type
+        )$url %>%
           # Pull data
           arcgislayers::arc_open() %>%
           # Return sf
           arcgislayers::arc_select() %>%
           # If a value provided, then filter
-          {if (all(tribe_name_parcel != "null") ) {
-            dplyr::filter(., TRIBE_NAME %in% tribe_name_parcel)
-          } else {
-            .
-          }}
-        
+          {
+            if (all(tribe_name_parcel != "null")) {
+              dplyr::filter(., TRIBE_NAME %in% tribe_name_parcel)
+            } else {
+              .
+            }
+          }
+
         # Otherwise filter by PARCEL_NO (Note that values in this col are not unique)
-      } else if(tribal_area_type == "Alaska Native Allotments"){
-        
-        aoi_sf <- dplyr::filter(map_service_urls,
-                                tribal_area == tribal_area_type)$url %>%
+      } else if (tribal_area_type == "Alaska Native Allotments") {
+        aoi_sf <- dplyr::filter(
+          map_service_urls,
+          tribal_area == tribal_area_type
+        )$url %>%
           arcgislayers::arc_open() %>%
           arcgislayers::arc_select() %>%
-          {if (all(tribe_name_parcel != "null")) {
-            dplyr::filter(., PARCEL_NO %in% tribe_name_parcel)
-          } else {
-            .
-          }}
-        
+          {
+            if (all(tribe_name_parcel != "null")) {
+              dplyr::filter(., PARCEL_NO %in% tribe_name_parcel)
+            } else {
+              .
+            }
+          }
       } else {
         stop("Tribal area type not recognized. Refer to TADA_TribalOptions() for query options.")
       }
-      
     }
-    
+
     # Check and/or fix geometry
     aoi_sf <- sf::st_make_valid(aoi_sf)
-    
+
     # Match CRS
-    if(sf::st_crs(aoi_sf) != 4326){
+    if (sf::st_crs(aoi_sf) != 4326) {
       aoi_sf <- sf::st_transform(aoi_sf, crs = 4326)
     }
-    
+
     # Get bbox of the sf object
     input_bbox <- sf::st_bbox(aoi_sf)
-    
+
     # Query info on available data within the bbox
     # Don't want to print every message that's returned by WQP
     quiet_whatWQPdata <- purrr::quietly(dataRetrieval::whatWQPdata)
-    
+
     # Try getting WQP info
     quiet_bbox_avail <- quiet_whatWQPdata(
       WQPquery,
       bBox = c(input_bbox$xmin, input_bbox$ymin, input_bbox$xmax, input_bbox$ymax)
     )
-    
+
     # Alert & stop if an http error was received
-    if(is.null(quiet_bbox_avail$result)){
-      
+    if (is.null(quiet_bbox_avail$result)) {
       stop_message <- quiet_bbox_avail$messages %>%
         grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
         paste("\n", ., collapse = "") %>%
         paste("The WQP request returned a NULL with the following message(s): \n",
-              .,
-              collapse = "\n")
-      
+          .,
+          collapse = "\n"
+        )
+
       stop(stop_message)
-      
     }
-    
+
     # Use result only
     bbox_avail <- quiet_bbox_avail$result
-    
+
     # Check if any sites are within the aoi
-    if ( (nrow(bbox_avail) > 0 ) == FALSE) {
+    if ((nrow(bbox_avail) > 0) == FALSE) {
       stop("No monitoring sites were returned within your area of interest (no data available).")
     }
-    
-    
+
+
     quiet_whatWQPsites <- purrr::quietly(dataRetrieval::whatWQPsites)
-    
+
     quiet_bbox_sites <- quiet_whatWQPsites(
       siteid = bbox_avail$MonitoringLocationIdentifier
     )
-    
-    if(is.null(quiet_bbox_sites$result)){
-      
+
+    if (is.null(quiet_bbox_sites$result)) {
       stop_message <- quiet_bbox_sites$messages %>%
         grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
         paste("\n", ., collapse = "") %>%
         paste("The WQP request returned a NULL with the following message(s): \n",
-              .,
-              collapse = "\n")      
+          .,
+          collapse = "\n"
+        )
       stop(stop_message)
-      
     }
-    
+
     # Reformat returned info as sf
     bbox_sites_sf <- TADA_MakeSpatial(quiet_bbox_sites$result, crs = 4326)
-    
+
     # Subset sites to only within shapefile and get IDs
     clipped_sites_sf <- bbox_sites_sf[aoi_sf, ]
-    
+
     clipped_site_ids <- clipped_sites_sf$MonitoringLocationIdentifier
-    
+
     record_count <- bbox_avail %>%
       dplyr::filter(MonitoringLocationIdentifier %in% clipped_site_ids) %>%
       dplyr::pull(resultCount) %>%
       sum()
-    
+
     # Should we proceed with downloads? If ask == TRUE then ask the user.
-    if(ask == TRUE){
+    if (ask == TRUE) {
       user_decision <- ask_user(n_records = record_count)
-      
+
       # Act on input
-      if(user_decision == "yes") {
+      if (user_decision == "yes") {
         print("Proceeding with download.")
       } else {
         stop("Cancelled by user.", call. = FALSE)
       }
     }
-    
+
     # Continue now with site count
     site_count <- length(clipped_site_ids)
-    
+
     # Check for either more than 300 sites or more records than max_recs.
     # If either is true then we'll approach the pull as a "big data" pull
-    if( site_count > 300 | record_count > maxrecs) {
+    if (site_count > 300 | record_count > maxrecs) {
       message(
         paste0(
           "The number of sites and/or records matched by the AOI and query terms is large, so the download may take some time. ",
           "If your AOI is a county, state, country, or HUC boundary it would be more efficient to provide a code instead of an sf object."
         )
       )
-      
+
       # Use helper function to download large data volume
       results.DR <- withCallingHandlers(
         TADA_BigDataHelper(
@@ -509,13 +512,13 @@ TADA_DataRetrieval <- function(startDate = "null",
         ),
         message = function(m) message(m$message)
       )
-      
-      
+
+
       rm(bbox_avail, bbox_sites_sf)
       gc()
-      
+
       # Check if any results were returned
-      if ( (nrow(results.DR) > 0 ) == FALSE) {
+      if ((nrow(results.DR) > 0) == FALSE) {
         print(
           paste0(
             "Returning empty results dataframe: ",
@@ -527,12 +530,11 @@ TADA_DataRetrieval <- function(startDate = "null",
         # Empty
         TADAprofile.clean <- results.DR
       } else {
-        
         # Get site metadata
         sites.DR <- clipped_sites_sf %>%
           dplyr::as_tibble() %>%
           dplyr::select(-geometry)
-        
+
         # Get project metadata
         projects.DR <- suppressMessages(
           dataRetrieval::readWQPdata(
@@ -542,7 +544,7 @@ TADA_DataRetrieval <- function(startDate = "null",
             service = "Project"
           )
         )
-        
+
         # Join results, sites, projects
         TADAprofile <- TADA_JoinWQPProfiles(
           FullPhysChem = results.DR,
@@ -551,26 +553,25 @@ TADA_DataRetrieval <- function(startDate = "null",
         ) %>% dplyr::mutate(
           dplyr::across(tidyselect::everything(), as.character)
         )
-        
+
         # run TADA_AutoClean function
         if (applyautoclean == TRUE) {
           print("Data successfully downloaded. Running TADA_AutoClean function.")
-          
+
           TADAprofile.clean <- TADA_AutoClean(TADAprofile)
         } else {
           TADAprofile.clean <- TADAprofile
         }
       }
-      
+
       return(TADAprofile.clean)
-      
+
       # Doesn't meet "big data" threshold:
     } else {
-      
       # Retrieve all 3 profiles
       print("Downloading WQP query results. This may take some time depending upon the query size.")
       print(WQPquery)
-      
+
       # Get results
       results.DR <- suppressMessages(
         dataRetrieval::readWQPdata(
@@ -580,7 +581,7 @@ TADA_DataRetrieval <- function(startDate = "null",
           ignore_attributes = TRUE
         )
       )
-      
+
       # Check if any results were returned
       if ((nrow(results.DR) > 0) == FALSE) {
         paste0(
@@ -591,12 +592,11 @@ TADA_DataRetrieval <- function(startDate = "null",
         )
         TADAprofile.clean <- results.DR
       } else {
-        
         # Get site metadata
         sites.DR <- clipped_sites_sf %>%
           dplyr::as_tibble() %>%
           dplyr::select(-geometry)
-        
+
         # Get project metadata
         projects.DR <- suppressMessages(
           dataRetrieval::readWQPdata(
@@ -606,7 +606,7 @@ TADA_DataRetrieval <- function(startDate = "null",
             service = "Project"
           )
         )
-        
+
         # Join results, sites, projects
         TADAprofile <- TADA_JoinWQPProfiles(
           FullPhysChem = results.DR,
@@ -615,27 +615,25 @@ TADA_DataRetrieval <- function(startDate = "null",
         ) %>% dplyr::mutate(
           dplyr::across(tidyselect::everything(), as.character)
         )
-        
+
         # Run TADA_AutoClean function
         if (applyautoclean == TRUE) {
           print("Data successfully downloaded. Running TADA_AutoClean function.")
-          
+
           TADAprofile.clean <- TADA_AutoClean(TADAprofile)
         } else {
           TADAprofile.clean <- TADAprofile
         }
       }
-      
+
       return(TADAprofile.clean)
-      
     }
-    
+
     # If no sf object provided:
-  } else {  
-    
+  } else {
     # Set query parameters
     WQPquery <- list()
-    
+
     if (!"null" %in% statecode) {
       load(system.file("extdata", "statecodes_df.Rdata", package = "EPATADA"))
       statecode <- as.character(statecode)
@@ -648,13 +646,13 @@ TADA_DataRetrieval <- function(startDate = "null",
         WQPquery <- c(WQPquery, statecode = list(statecd))
       }
     }
-    
+
     if (length(huc) > 1) {
       WQPquery <- c(WQPquery, huc = list(huc))
     } else if (huc != "null") {
       WQPquery <- c(WQPquery, huc = huc)
     }
-    
+
     if (length(startDate) > 1) {
       if (is.na(suppressWarnings(lubridate::parse_date_time(startDate[1], orders = "ymd")))) {
         stop("Incorrect date format. Please use the format YYYY-MM-DD.")
@@ -666,67 +664,67 @@ TADA_DataRetrieval <- function(startDate = "null",
       }
       WQPquery <- c(WQPquery, startDate = startDate)
     }
-    
+
     if (length(countrycode) > 1) {
       WQPquery <- c(WQPquery, countrycode = list(countrycode))
     } else if (countrycode != "null") {
       WQPquery <- c(WQPquery, countrycode = countrycode)
     }
-    
+
     if (length(countycode) > 1) {
       WQPquery <- c(WQPquery, countycode = list(countycode))
     } else if (countycode != "null") {
       WQPquery <- c(WQPquery, countycode = countycode)
     }
-    
+
     if (length(siteid) > 1) {
       WQPquery <- c(WQPquery, siteid = list(siteid))
     } else if (siteid != "null") {
       WQPquery <- c(WQPquery, siteid = siteid)
     }
-    
+
     if (length(siteType) > 1) {
       WQPquery <- c(WQPquery, siteType = list(siteType))
     } else if (siteType != "null") {
       WQPquery <- c(WQPquery, siteType = siteType)
     }
-    
+
     if (length(characteristicName) > 1) {
       WQPquery <- c(WQPquery, characteristicName = list(characteristicName))
     } else if (characteristicName != "null") {
       WQPquery <- c(WQPquery, characteristicName = characteristicName)
     }
-    
+
     if (length(characteristicType) > 1) {
       WQPquery <- c(WQPquery, characteristicType = list(characteristicType))
     } else if (characteristicType != "null") {
       WQPquery <- c(WQPquery, characteristicType = characteristicType)
     }
-    
+
     if (length(sampleMedia) > 1) {
       WQPquery <- c(WQPquery, sampleMedia = list(sampleMedia))
     } else if (sampleMedia != "null") {
       WQPquery <- c(WQPquery, sampleMedia = sampleMedia)
     }
-    
+
     if (length(project) > 1) {
       WQPquery <- c(WQPquery, project = list(project))
     } else if (project != "null") {
       WQPquery <- c(WQPquery, project = project)
     }
-    
+
     if (length(providers) > 1) {
       WQPquery <- c(WQPquery, providers = list(providers))
     } else if (providers != "null") {
       WQPquery <- c(WQPquery, providers = providers)
     }
-    
+
     if (length(organization) > 1) {
       WQPquery <- c(WQPquery, organization = list(organization))
     } else if (organization != "null") {
       WQPquery <- c(WQPquery, organization = organization)
     }
-    
+
     if (length(endDate) > 1) {
       if (is.na(suppressWarnings(lubridate::parse_date_time(endDate[1], orders = "ymd")))) {
         stop("Incorrect date format. Please use the format YYYY-MM-DD.")
@@ -738,53 +736,52 @@ TADA_DataRetrieval <- function(startDate = "null",
       }
       WQPquery <- c(WQPquery, endDate = endDate)
     }
-    
+
     # Query info on available data
     # Don't want to print every message that's returned by WQP
     quiet_whatWQPdata <- purrr::quietly(dataRetrieval::whatWQPdata)
-    
+
     quiet_query_avail <- quiet_whatWQPdata(WQPquery)
-    
-    if(is.null(quiet_query_avail$result)){
-      
+
+    if (is.null(quiet_query_avail$result)) {
       stop_message <- quiet_query_avail$messages %>%
         grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
         paste("\n", ., collapse = "") %>%
         paste("The WQP request returned a NULL with the following message(s): \n",
-              .,
-              collapse = "\n")
-      
+          .,
+          collapse = "\n"
+        )
+
       stop(stop_message)
-      
     }
-    
+
     query_avail <- quiet_query_avail$result
-    
+
     site_count <- length(query_avail$MonitoringLocationIdentifier)
-    
+
     record_count <- query_avail %>%
       dplyr::pull(resultCount) %>%
       sum()
-    
+
     # Should we proceed with downloads? If ask == TRUE then ask the user.
-    if(ask == TRUE){
+    if (ask == TRUE) {
       user_decision <- ask_user(n_records = record_count)
-      
+
       # Act on input
-      if(user_decision == "yes") {
+      if (user_decision == "yes") {
         print("Proceeding with download.")
       } else {
         stop("Cancelled by user.", call. = FALSE)
       }
     }
-    
+
     # Check for either more than 300 sites or more records than max_recs.
     # If either is true then we'll approach the pull as a "big data" pull
-    if(site_count > 300 | record_count > maxrecs) {
+    if (site_count > 300 | record_count > maxrecs) {
       message(
         "The number of sites and/or records matched by the query terms is large, so the download may take some time."
       )
-      
+
       # Use helper function to download large data volume
       results.DR <- suppressMessages(
         TADA_BigDataHelper(
@@ -795,17 +792,17 @@ TADA_DataRetrieval <- function(startDate = "null",
           maxsites = 300
         )
       )
-      
+
       rm(query_avail)
       gc()
-      
+
       # Get site metadata
       sites.DR <- suppressMessages(
         dataRetrieval::whatWQPsites(
           siteid = unique(results.DR$MonitoringLocationIdentifier)
         )
       )
-      
+
       # Get project metadata
       projects.DR <- suppressMessages(
         dataRetrieval::readWQPdata(
@@ -815,7 +812,7 @@ TADA_DataRetrieval <- function(startDate = "null",
           service = "Project"
         )
       )
-      
+
       # Join results, sites, projects
       TADAprofile <- TADA_JoinWQPProfiles(
         FullPhysChem = results.DR,
@@ -824,18 +821,18 @@ TADA_DataRetrieval <- function(startDate = "null",
       ) %>% dplyr::mutate(
         dplyr::across(tidyselect::everything(), as.character)
       )
-      
+
       # run TADA_AutoClean function
       if (applyautoclean == TRUE) {
         print("Data successfully downloaded. Running TADA_AutoClean function.")
-        
+
         TADAprofile.clean <- TADA_AutoClean(TADAprofile)
       } else {
         TADAprofile.clean <- TADAprofile
       }
-      
+
       return(TADAprofile.clean)
-      
+
       # If not a "big data" pull:
     } else {
       # Retrieve all 3 profiles
@@ -843,25 +840,25 @@ TADA_DataRetrieval <- function(startDate = "null",
       print(WQPquery)
       results.DR <- suppressMessages(
         dataRetrieval::readWQPdata(WQPquery,
-                                   dataProfile = "resultPhysChem",
-                                   ignore_attributes = TRUE
+          dataProfile = "resultPhysChem",
+          ignore_attributes = TRUE
         )
       )
-      
+
       # check if any results are available
       if ((nrow(results.DR) > 0) == FALSE) {
         print("Returning empty results dataframe: Your WQP query returned no results (no data available). Try a different query. Removing some of your query filters OR broadening your search area may help.")
         TADAprofile.clean <- results.DR
       } else {
         sites.DR <- suppressMessages(dataRetrieval::whatWQPsites(WQPquery))
-        
+
         projects.DR <- suppressMessages(
           dataRetrieval::readWQPdata(WQPquery,
-                                     ignore_attributes = TRUE,
-                                     service = "Project"
+            ignore_attributes = TRUE,
+            service = "Project"
           )
         )
-        
+
         TADAprofile <- TADA_JoinWQPProfiles(
           FullPhysChem = results.DR,
           Sites = sites.DR,
@@ -869,62 +866,61 @@ TADA_DataRetrieval <- function(startDate = "null",
         ) %>% dplyr::mutate(
           dplyr::across(tidyselect::everything(), as.character)
         )
-        
+
         # run TADA_AutoClean function
         if (applyautoclean == TRUE) {
           print("Data successfully downloaded. Running TADA_AutoClean function.")
-          
+
           TADAprofile.clean <- TADA_AutoClean(TADAprofile)
         } else {
           TADAprofile.clean <- TADAprofile
         }
       }
-      
+
       return(TADAprofile.clean)
     }
   }
 }
 
 #' Access options available for querying tribal spatial data with `TADA_DataRetrieval()`.
-#' 
+#'
 #' @description
 #' This function provides access to [six layer datasets](https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer)
 #' containing spatial data related to tribal lands: "Alaska Native Allotments",
 #' "Alaska Native Villages", "American Indian Reservations", "Off-reservation Trust Lands",
 #' "Oklahoma Tribal Statistical Areas", and "Virginia Federally Recognized Tribes".
 #' These datasets are used by `TADA_DataRetrieval()` when retrieving spatial data
-#' for tribal lands specified by the user. 
-#' 
+#' for tribal lands specified by the user.
+#'
 #' The purpose of `TADA_TribalOptions()` is to allow the user to review the available
 #' data in those datasets and identify the records they would like to query with
 #' `TADA_DataRetrieval()`.
-#' 
+#'
 #' An interactive map of the six layer datasets is available on ArcGIS Online Map
 #' Viewer here: https://www.arcgis.com/apps/mapviewer/index.html?url=https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer&source=sd
-#' 
+#'
 #' @param tribal_area_type A character string. Must be one of the six tribal
-#' spatial layers: "Alaska Native Allotments", "Alaska Native Villages", 
+#' spatial layers: "Alaska Native Allotments", "Alaska Native Villages",
 #' "American Indian Reservations", "Off-reservation Trust Lands",
 #' "Oklahoma Tribal Statistical Areas", or "Virginia Federally Recognized Tribes".
-#' 
+#'
 #' @param return_sf Logical. Should the function return the dataset as an `sf`
 #' object (TRUE) or a data frame (FALSE)? Defaults to FALSE.
-#' 
+#'
 #' @returns A data frame or `sf` object containing the specified layer from the EPA
 #' Map Service.
-#' 
+#'
 #' @note
 #' Alaska Native Villages and Virginia Federally Recognized Tribes are point
 #' geometries in the Map Service, not polygons. At the time of this writing they
 #' do not return any data when used for WQP bbox queries.
-#' 
+#'
 #' @export
-#' 
+#'
 #' @seealso [TADA_DataRetrieval()]
-#' 
+#'
 
-TADA_TribalOptions <- function(tribal_area_type, return_sf = FALSE){
-  
+TADA_TribalOptions <- function(tribal_area_type, return_sf = FALSE) {
   # Make a reference table for tribal area type + url matching
   map_service_urls <- tibble::tribble(
     ~tribal_area,                            ~url,
@@ -935,22 +931,24 @@ TADA_TribalOptions <- function(tribal_area_type, return_sf = FALSE){
     "Oklahoma Tribal Statistical Areas",     "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/4",
     "Virginia Federally Recognized Tribes",  "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/5"
   )
-  
+
   # Confirm usable string provided
-  if( !(tribal_area_type %in% map_service_urls$tribal_area) ){
+  if (!(tribal_area_type %in% map_service_urls$tribal_area)) {
     stop("tribal_area_type must match one of the six tribal spatial layer names.")
   }
-  
+
   # Query Map Service
-  tribal_area_sf <- dplyr::filter(map_service_urls,
-                                  tribal_area == tribal_area_type)$url %>%
+  tribal_area_sf <- dplyr::filter(
+    map_service_urls,
+    tribal_area == tribal_area_type
+  )$url %>%
     arcgislayers::arc_open() %>%
     # Return sf
     arcgislayers::arc_select() %>%
     sf::st_make_valid()
-  
+
   # Convert to df if needed, export
-  if(return_sf == FALSE){
+  if (return_sf == FALSE) {
     return(
       as.data.frame(tribal_area_sf) %>%
         sf::st_drop_geometry()
@@ -958,7 +956,6 @@ TADA_TribalOptions <- function(tribal_area_type, return_sf = FALSE){
   } else {
     return(tribal_area_sf)
   }
-  
 }
 
 #' Read in WQP data using the Water Quality Portal (WQP) web services
@@ -1015,25 +1012,25 @@ TADA_ReadWQPWebServices <- function(webservice) {
   # read in csv from WQP web service
   if (grepl("zip=yes", webservice)) {
     webservice <- stringr::str_replace(webservice, "zip=yes", "zip=no")
-    
+
     # download data
     webservice <- data.table::fread(toString(webservice))
-    
+
     # if input df was not downloaded using USGS's dataRetrieval, then the
     # column names will include / separators instead of . and TADA uses .
     # (e.g. ResultMeasure/MeasureUnitCode vs. ResultMeasure.MeasureUnitCode)
     colnames(webservice) <- gsub("/", ".", colnames(webservice))
-    
+
     return(webservice)
   } else {
     # download data
     webservice <- data.table::fread(toString(webservice))
-    
+
     # if input df was not downloaded using USGS's dataRetrieval, then the
     # column names will include / separators instead of . and TADA uses .
     # (e.g. ResultMeasure/MeasureUnitCode vs. ResultMeasure.MeasureUnitCode)
     colnames(webservice) <- gsub("/", ".", colnames(webservice))
-    
+
     return(webservice)
   }
 }
@@ -1053,23 +1050,22 @@ TADA_ReadWQPWebServices <- function(webservice) {
 #' @param maxsites Maximum number of sites to query at once.
 #'
 #' @return TADA-compatible dataframe
-TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsites = 300){
-  
+TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsites = 300) {
   # Get total number of results per site and separate out sites with >maxrecs results
   tot_sites <- record_summary %>%
     dplyr::group_by(MonitoringLocationIdentifier) %>%
     dplyr::summarise(tot_n = sum(resultCount)) %>%
     dplyr::filter(tot_n > 0) %>%
     dplyr::arrange(tot_n)
-  
+
   # Sites with less than/equal to maxrecs
   smallsites <- tot_sites %>% dplyr::filter(tot_n <= maxrecs)
   # Sites with more than maxrecs
   bigsites <- tot_sites %>% dplyr::filter(tot_n > maxrecs)
-  
+
   df_small <- data.frame()
   df_big <- data.frame()
-  
+
   # Work with small sites first:
   # Build download groups. Total record count limited to value of maxrecs.
   # Number of sites per download group limited to 300.
@@ -1080,20 +1076,24 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
         threshold = maxrecs,
         maxgroupsize = 300
       ))
-    
+
     # Status update to user
     print(
-      paste0("Downloading data from sites with fewer than ",
-             maxrecs,
-             " results by grouping them together.")
+      paste0(
+        "Downloading data from sites with fewer than ",
+        maxrecs,
+        " results by grouping them together."
+      )
     )
-    
+
     small_prog_bar <- txtProgressBar(min = 0, max = sum(smallsites$tot_n), style = 3)
-    
+
     # Download the data for each group
     for (i in 1:max(smallsitesgrp$group)) {
-      small_site_chunk <- subset(smallsitesgrp$MonitoringLocationIdentifier,
-                                 smallsitesgrp$group == i)
+      small_site_chunk <- subset(
+        smallsitesgrp$MonitoringLocationIdentifier,
+        smallsitesgrp$group == i
+      )
       # Query result data
       results_small <- suppressMessages(
         dataRetrieval::readWQPdata(
@@ -1104,35 +1104,37 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
         )
       ) %>%
         dplyr::mutate(dplyr::across(everything(), as.character))
-      
+
       # If data is returned, stack with what's already been retrieved
       if (dim(results_small)[1] > 0) {
         df_small <- dplyr::bind_rows(df_small, results_small)
       }
-      
+
       # Update progress
       setTxtProgressBar(pb = small_prog_bar, value = nrow(df_small))
     }
     # Close progress bar when complete
     close(small_prog_bar)
-    
+
     rm(smallsites, smallsitesgrp)
     gc()
   }
-  
+
   # Large sites (>= maxrecs) next:
   if (dim(bigsites)[1] > 0) {
     print(
-      paste0("Downloading data from sites with greater than ",
-             maxrecs,
-             " results, chunking queries by site.")
+      paste0(
+        "Downloading data from sites with greater than ",
+        maxrecs,
+        " results, chunking queries by site."
+      )
     )
-    
+
     big_prog_bar <- txtProgressBar(min = 0, max = sum(bigsites$tot_n), style = 3)
-    
+
     # Unique site IDs
     bsitesvec <- unique(bigsites$MonitoringLocationIdentifier)
-    
+
     # For each site
     for (i in 1:length(bsitesvec)) {
       # Download each site's data individually
@@ -1143,9 +1145,9 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
           dataProfile = "resultPhysChem",
           ignore_attributes = TRUE
         )
-      )%>%
+      ) %>%
         dplyr::mutate(dplyr::across(everything(), as.character))
-      
+
       if (dim(results_big)[1] > 0) {
         df_big <- dplyr::bind_rows(df_big, results_big)
       }
@@ -1154,14 +1156,14 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
     }
     # Close progress bar when complete
     close(big_prog_bar)
-    
+
     rm(bigsites)
     gc()
   }
-  
-  
+
+
   df_out <- dplyr::bind_rows(df_small, df_big)
-  
+
   return(df_out)
 }
 
@@ -1248,11 +1250,11 @@ TADA_BigDataRetrieval <- function(startDate = "null",
                                   maxrecs = 250000,
                                   applyautoclean = FALSE) {
   start_T <- Sys.time()
-  
+
   if (!"null" %in% statecode & !"null" %in% huc) {
     stop("Please provide either state code(s) OR huc(s) to proceed.")
   }
-  
+
   if (!startDate == "null") {
     startDat <- lubridate::ymd(startDate)
     startYearLo <- lubridate::year(startDat)
@@ -1261,7 +1263,7 @@ TADA_BigDataRetrieval <- function(startDate = "null",
     startDat <- lubridate::ymd(startDate)
     startYearLo <- lubridate::year(startDat)
   }
-  
+
   # Logic: if the input endDate is not null, convert to date and obtain year
   # for summary
   if (!endDate == "null") {
@@ -1272,7 +1274,7 @@ TADA_BigDataRetrieval <- function(startDate = "null",
     endDat <- lubridate::ymd(endDate)
     endYearHi <- lubridate::year(endDat)
   }
-  
+
   # Create readWQPsummary query
   WQPquery <- list()
   if (length(characteristicName) > 1) {
@@ -1290,7 +1292,7 @@ TADA_BigDataRetrieval <- function(startDate = "null",
   } else if (siteType != "null") {
     WQPquery <- c(WQPquery, siteType = siteType)
   }
-  
+
   if (!"null" %in% statecode) {
     load(system.file("extdata", "statecodes_df.Rdata", package = "EPATADA"))
     statecode <- as.character(statecode)
@@ -1308,45 +1310,45 @@ TADA_BigDataRetrieval <- function(startDate = "null",
       WQPquery <- c(WQPquery, statecode = statecd)
     }
   }
-  
+
   if (length(huc) > 1) {
     WQPquery <- c(WQPquery, huc = list(huc))
   } else if (huc != "null") {
     WQPquery <- c(WQPquery, huc = huc)
   }
-  
+
   if (length(countrycode) > 1) {
     WQPquery <- c(WQPquery, countrycode = list(countrycode))
   } else if (countrycode != "null") {
     WQPquery <- c(WQPquery, countrycode = countrycode)
   }
-  
+
   if (length(countycode) > 1) {
     WQPquery <- c(WQPquery, countycode = list(countycode))
   } else if (countycode != "null") {
     WQPquery <- c(WQPquery, countycode = countycode)
   }
-  
+
   if (length(organization) > 1) {
     WQPquery <- c(WQPquery, organization = list(organization))
   } else if (organization != "null") {
     WQPquery <- c(WQPquery, organization = organization)
   }
-  
+
   # cut down on summary query time if possible based on big data query
   diffdat <- lubridate::time_length(difftime(Sys.Date(), startDat), "years")
-  
+
   if (diffdat <= 1) {
     WQPquery <- c(WQPquery, summaryYears = 1)
   }
-  
+
   if (diffdat > 1 & diffdat <= 5) {
     WQPquery <- c(WQPquery, summaryYears = 5)
   }
-  
+
   print("Building site summary table for chunking result downloads...")
   df_summary <- dataRetrieval::readWQPsummary(WQPquery)
-  
+
   ## NOTE: if query brings back no results, function returns empty
   # dataRetrieval profile, not empty summary
   if (nrow(df_summary) > 0) {
@@ -1356,13 +1358,10 @@ TADA_BigDataRetrieval <- function(startDate = "null",
         YearSummarized >= startYearLo,
         YearSummarized <= endYearHi
       )
-    
+
     rm(df_summary)
     # if there are still site records when filtered to years of interest....
     if (dim(sites)[1] > 0) {
-      
-      
-      
       # get total number of results per site and separate out sites with >250000 results
       tot_sites <- sites %>%
         dplyr::group_by(MonitoringLocationIdentifier) %>%
@@ -1370,14 +1369,14 @@ TADA_BigDataRetrieval <- function(startDate = "null",
         dplyr::arrange(tot_n)
       smallsites <- tot_sites %>% dplyr::filter(tot_n <= maxrecs)
       bigsites <- tot_sites %>% dplyr::filter(tot_n > maxrecs)
-      
+
       df <- data.frame()
-      
+
       if (dim(smallsites)[1] > 0) {
         smallsitesgrp <- make_groups(smallsites, maxrecs)
-        
+
         print(paste0("Downloading data from sites with fewer than ", maxrecs, " results by grouping them together."))
-        
+
         for (i in 1:max(smallsitesgrp$group)) {
           site_chunk <- subset(smallsitesgrp$MonitoringLocationIdentifier, smallsitesgrp$group == i)
           joins <- TADA_DataRetrieval(
@@ -1393,29 +1392,29 @@ TADA_BigDataRetrieval <- function(startDate = "null",
             df <- dplyr::bind_rows(df, joins)
           }
         }
-        
+
         rm(smallsites, smallsitesgrp)
       }
-      
+
       if (dim(bigsites)[1] > 0) {
         print(paste0("Downloading data from sites with greater than ", maxrecs, " results, chunking queries by shorter time intervals..."))
-        
+
         bsitesvec <- unique(bigsites$MonitoringLocationIdentifier)
-        
+
         for (i in 1:length(bsitesvec)) {
           mlidsum <- subset(sites, sites$MonitoringLocationIdentifier == bsitesvec[i])
           mlidsum <- mlidsum %>%
             dplyr::group_by(MonitoringLocationIdentifier, YearSummarized) %>%
             dplyr::summarise(tot_n = sum(ResultCount))
           site_chunk <- unique(mlidsum$MonitoringLocationIdentifier)
-          
+
           bigsitegrps <- make_groups(mlidsum, maxrecs)
-          
+
           for (i in 1:max(bigsitegrps$group)) {
             yearchunk <- subset(bigsitegrps$YearSummarized, bigsitegrps$group == i)
             startD <- paste0(min(yearchunk), "-01-01")
             endD <- paste0(max(yearchunk), "-12-31")
-            
+
             joins <- TADA_DataRetrieval(
               startDate = startD,
               endDate = endD,
@@ -1425,7 +1424,7 @@ TADA_BigDataRetrieval <- function(startDate = "null",
               sampleMedia = sampleMedia,
               applyautoclean = FALSE
             )
-            
+
             if (dim(joins)[1] > 0) {
               df <- dplyr::bind_rows(df, joins)
             }
@@ -1441,18 +1440,18 @@ TADA_BigDataRetrieval <- function(startDate = "null",
     warning("Query returned no data. Function returns an empty dataframe.")
     return(df_summary)
   }
-  
+
   df <- subset(df, as.Date(df$ActivityStartDate, "%Y-%m-%d") >= startDat & as.Date(df$ActivityStartDate, "%Y-%m-%d") <= endDat)
-  
+
   if (applyautoclean == TRUE) {
     print("Applying TADA_AutoClean function...")
     df <- TADA_AutoClean(df)
   }
-  
+
   # timing function for efficiency tests.
   difference <- difftime(Sys.time(), start_T, units = "mins")
   print(difference)
-  
+
   return(df)
 }
 
@@ -1489,20 +1488,20 @@ TADA_JoinWQPProfiles <- function(FullPhysChem = "null",
                                  Sites = "null",
                                  Projects = "null") {
   FullPhysChem.df <- FullPhysChem
-  
+
   Sites.df <- Sites
-  
+
   Projects.df <- Projects
-  
+
   # Join station data to full phys/chem (FullPhysChem.df)
   if (length(Sites.df > 1)) {
     if (nrow(Sites.df) > 0) {
       join1 <- FullPhysChem.df %>%
         # join stations to results
         dplyr::left_join(Sites.df,
-                         by = "MonitoringLocationIdentifier",
-                         multiple = "all",
-                         relationship = "many-to-many"
+          by = "MonitoringLocationIdentifier",
+          multiple = "all",
+          relationship = "many-to-many"
         ) %>%
         # remove ".x" suffix from column names
         dplyr::rename_at(dplyr::vars(dplyr::ends_with(".x")), ~ stringr::str_replace(., "\\..$", "")) %>%
@@ -1514,8 +1513,8 @@ TADA_JoinWQPProfiles <- function(FullPhysChem = "null",
   } else {
     join1 <- FullPhysChem.df
   }
-  
-  
+
+
   # Add QAPP columns from project
   if (length(Projects.df) > 1) {
     if (nrow(Projects.df) > 0) {
@@ -1541,20 +1540,19 @@ TADA_JoinWQPProfiles <- function(FullPhysChem = "null",
   } else {
     join2 <- join1
   }
-  
+
   return(join2)
 }
 
 #' Ask user to approve WQP downloads
-#' 
+#'
 #' Once record counts have been retrieved from the Water Quality Portal (WQP) for
 #' a query, this function is used to prompt the user to decide (i.e., "yes"/"no")
 #' whether the download should proceed. The user is also reminded of the limits of
 #' Microsoft Excel for row counts as a comparison.
-#' 
+#'
 #' @param n_records A numeric value indicating the number of records that will be downloaded from the WQP if the user decides to proceed.
-ask_user <- function(n_records){
-  
+ask_user <- function(n_records) {
   # Text to show user
   user_prompt <- cat(
     "Your WQP query will return ",
@@ -1563,9 +1561,9 @@ ask_user <- function(n_records){
     "Would you like to continue with the download? [yes/no] ",
     sep = ""
   )
-  
+
   # Ask user if they want to continue & check for valid response
-  while(TRUE){
+  while (TRUE) {
     user_input <- readline(prompt = user_prompt)
     # Convert response to lower and no whitespace
     user_input <- tolower(trimws(user_input))
@@ -1575,7 +1573,6 @@ ask_user <- function(n_records){
       cat("Invalid input. Please enter 'yes' or 'no'.\n")
     }
   }
-  
 }
 
 
@@ -1601,9 +1598,9 @@ make_groups <- function(x, maxrecs) {
       i <- i + 1
       groupings <- plyr::rbind.fill(groupings, group)
     }
-    
+
     x$group <- i
-    
+
     groupings <- plyr::rbind.fill(groupings, x)
   }
   return(groupings)
diff --git a/R/Maintenance.R b/R/Maintenance.R
index 428f0a5f..b5f047fa 100644
--- a/R/Maintenance.R
+++ b/R/Maintenance.R
@@ -304,7 +304,7 @@ TADA_UpdateExampleData <- function() {
 #     Type == "CharacteristicUnit",
 #     Status == "Accepted"
 #   )
-# 
+#
 # # find Characteristic/Source/Value.Unit combinations with more than one row
 # find.dups <- unit.ref %>%
 #   dplyr::filter(Type == "CharacteristicUnit") %>%
@@ -313,14 +313,12 @@ TADA_UpdateExampleData <- function() {
 #                 Max_n = length(unique(Maximum))) %>%
 #   dplyr::filter(Min_n > 1 |
 #                   Max_n > 1)
-# 
+#
 # # create download path
 # download.path <- file.path(Sys.getenv("USERPROFILE"), "Downloads", "WQXcharValRef_multiples.csv")
-# 
+#
 # # create csv to send to WQX team and save in test results folder
 # readr::write_csv(find.dups, download.path)
 #
 # # review csv and send to WQX team to update the validation table
 #
-
-
diff --git a/tests/testthat/test-DataDiscoveryRetrieval.R b/tests/testthat/test-DataDiscoveryRetrieval.R
index bd96696a..8c2cb9f2 100644
--- a/tests/testthat/test-DataDiscoveryRetrieval.R
+++ b/tests/testthat/test-DataDiscoveryRetrieval.R
@@ -418,7 +418,7 @@ test_that("TADA_DataRetrieval", {
 # test_that("Reg&BigdataRetrieval", {
 #   # big <- TADA_BigDataRetrieval(characteristicName = "Algae, substrate rock/bank cover (choice list)", sampleMedia = "Water", siteType = "Stream", startDate = "2020-01-01", applyautoclean = FALSE)
 #   reg <- TADA_DataRetrieval(characteristicName = "Algae, substrate rock/bank cover (choice list)", sampleMedia = "Water", siteType = "Stream", startDate = "2020-01-01", applyautoclean = FALSE, ask = FALSE)
-# 
+#
 #   expect_equal(nrow(big), nrow(reg))
 # })
 
@@ -428,7 +428,7 @@ test_that("TADA_DataRetrieval", {
 #   endDate <- "2021-09-30"
 #   # big <- TADA_BigDataRetrieval(startDate = startDate, endDate = endDate, huc = c("04030202", "04030201"), characteristicName = "Escherichia coli", siteType = "Stream")
 #   logic <- min(big$ActivityStartDate) >= as.Date(startDate, format = "%Y-%m-%d") & max(big$ActivityStartDate) <= as.Date(endDate, format = "%Y-%m-%d")
-# 
+#
 #   expect_true(logic)
 # })
 
diff --git a/tests/testthat/test-ResultFlagsIndependent.R b/tests/testthat/test-ResultFlagsIndependent.R
index 9cad1019..4782e61c 100644
--- a/tests/testthat/test-ResultFlagsIndependent.R
+++ b/tests/testthat/test-ResultFlagsIndependent.R
@@ -123,36 +123,36 @@ test_that("TADA_FindPotentialDuplicatsMultipleOrgs has non-NA values for each ro
 })
 
 test_that("WQXcharValRef.csv contains only one row for each unique characteristic/source/unit combination for threshold functions", {
-
   unit.ref <- utils::read.csv(system.file("extdata", "WQXcharValRef.csv", package = "EPATADA")) %>%
-  dplyr::filter(
-    Type == "CharacteristicUnit",
-    Status == "Accepted"
-  )
-  
+    dplyr::filter(
+      Type == "CharacteristicUnit",
+      Status == "Accepted"
+    )
+
   find.dups <- unit.ref %>%
     dplyr::filter(Type == "CharacteristicUnit") %>%
     dplyr::group_by(Characteristic, Source, Value.Unit) %>%
-    dplyr::mutate(Min_n = length(unique(Minimum)),
-                  Max_n = length(unique(Maximum))) %>%
+    dplyr::mutate(
+      Min_n = length(unique(Minimum)),
+      Max_n = length(unique(Maximum))
+    ) %>%
     dplyr::filter(Min_n > 1 |
-                    Max_n > 1)
-  
+      Max_n > 1)
+
   expect_true(nrow(find.dups) == 0)
-  })
+})
 
 test_that("range flag functions work", {
   # use random data
   upper <- TADA_RandomTestingData()
-  
+
   expect_no_error(TADA_FlagAboveThreshold(upper))
-  expect_no_warning(TADA_FlagAboveThreshold(upper)) 
+  expect_no_warning(TADA_FlagAboveThreshold(upper))
   expect_no_message(TADA_FlagAboveThreshold(upper))
   expect_no_condition(TADA_FlagAboveThreshold(upper))
-  
+
   expect_no_error(TADA_FlagBelowThreshold(upper))
-  expect_no_warning(TADA_FlagBelowThreshold(upper)) 
+  expect_no_warning(TADA_FlagBelowThreshold(upper))
   expect_no_message(TADA_FlagBelowThreshold(upper))
   expect_no_condition(TADA_FlagBelowThreshold(upper))
-  
 })

From 8ab88578692e2ccbd7561a0759174d383779cdf5 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Thu, 23 Jan 2025 08:40:03 -0800
Subject: [PATCH 22/35] Example housekeeping

---
 R/CriteriaComparison.R                        |  3 +-
 R/DataDiscoveryRetrieval.R                    | 29 ++++++++++++-------
 R/GeospatialFunctions.R                       | 15 ++++++----
 R/ResultFlagsIndependent.R                    |  4 +--
 R/Utilities.R                                 |  6 ++--
 man/TADA_DataRetrieval.Rd                     | 29 ++++++++++++-------
 ...ADA_FindPotentialDuplicatesMultipleOrgs.Rd |  2 +-
 man/TADA_FlagContinuousData.Rd                |  2 +-
 man/TADA_GetATTAINS.Rd                        |  3 +-
 man/TADA_MakeSpatial.Rd                       |  3 +-
 man/TADA_PairForCriteriaCalc.Rd               |  3 +-
 man/TADA_SubstituteDeprecatedChars.Rd         |  6 ++--
 man/TADA_ViewATTAINS.Rd                       |  3 +-
 man/fetchATTAINS.Rd                           |  3 +-
 man/fetchNHD.Rd                               |  3 +-
 15 files changed, 70 insertions(+), 44 deletions(-)

diff --git a/R/CriteriaComparison.R b/R/CriteriaComparison.R
index 82634975..792dbf30 100644
--- a/R/CriteriaComparison.R
+++ b/R/CriteriaComparison.R
@@ -207,7 +207,8 @@ TADA_CreatePairRef <- function(.data, ph = TRUE, hardness = TRUE, temp = TRUE,
 #' AL_df <- TADA_DataRetrieval(
 #'   startDate = "2010-11-30",
 #'   endDate = "2010-12-01",
-#'   statecode = "AL"
+#'   statecode = "AL",
+#'   ask = FALSE
 #' )
 #'
 #' AL_PairRef <- TADA_PairForCriteriaCalc(AL_df)
diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 67606762..6688cd8c 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -73,24 +73,26 @@
 #' # example for WI
 #' tada1 <- TADA_DataRetrieval(
 #'   statecode = "WI", countycode = "Dane",
-#'   characteristicName = "Phosphorus"
+#'   characteristicName = "Phosphorus",
+#'   ask = FALSE
 #' )
 #'
 #' # example for UT
 #' tada2 <- TADA_DataRetrieval(
 #'   statecode = "UT",
-#'   characteristicName = c("Ammonia", "Nitrate", "Nitrogen")
+#'   characteristicName = c("Ammonia", "Nitrate", "Nitrogen"),
+#'   ask = FALSE
 #' )
 #'
 #' # example for SC
-#' tada3 <- TADA_DataRetrieval(statecode = "SC", countycode = "Abbeville")
+#' tada3 <- TADA_DataRetrieval(statecode = "SC", countycode = "Abbeville", ask = FALSE)
 #'
 #' # example for CT
-#' tada4 <- TADA_DataRetrieval(statecode = "CT", startDate = "2020-10-01")
+#' tada4 <- TADA_DataRetrieval(statecode = "CT", startDate = "2020-10-01", ask = FALSE)
 #'
 #'
 #' # note that countycode queries require a statecode (see example below)
-#' tada5 <- TADA_DataRetrieval(countycode = "US:02:020")
+#' tada5 <- TADA_DataRetrieval(countycode = "US:02:020", ask = FALSE)
 #'
 #' # example for NM
 #' tada6 <- TADA_DataRetrieval(
@@ -100,11 +102,12 @@
 #'     "Nitrate",
 #'     "Nitrogen"
 #'   ),
-#'   startDate = "2020-05-01"
+#'   startDate = "2020-05-01",
+#'   ask = FALSE
 #' )
 #'
 #' # example for AK project
-#' tada7 <- TADA_DataRetrieval(project = "Anchorage Bacteria 20-21")
+#' tada7 <- TADA_DataRetrieval(project = "Anchorage Bacteria 20-21", ask = FALSE)
 #'
 #' # another example for AK
 #' tada8 <- TADA_DataRetrieval(
@@ -117,7 +120,8 @@
 #'     "Nitrate",
 #'     "Nitrogen"
 #'   ),
-#'   startDate = "2018-05-01"
+#'   startDate = "2018-05-01",
+#'   ask = FALSE
 #' )
 #'
 #' # example for tribes
@@ -160,7 +164,8 @@
 #'   "CHOCNATWQX",
 #'   "WNENVDPT_WQX",
 #'   "PUEBLO_POJOAQUE"
-#' ))
+#' ),
+#' ask = FALSE)
 #'
 #' # query only NWIS data for a 10 year period in CT
 #' tada10 <- TADA_DataRetrieval(
@@ -169,13 +174,15 @@
 #'   sampleMedia = c("Water", "water"),
 #'   statecode = "CT", # consider downloading only 1 state at a time
 #'   providers = "NWIS",
-#'   applyautoclean = FALSE
+#'   applyautoclean = FALSE,
+#'   ask = FALSE
 #' )
 #'
 #' # query by country code (e.g. Canada, countrycode = "CA")
 #' tada11 <- TADA_DataRetrieval(
 #'   startDate = "2015-01-01",
-#'   countrycode = "CA"
+#'   countrycode = "CA",
+#'   ask = FALSE
 #' )
 #' }
 #'
diff --git a/R/GeospatialFunctions.R b/R/GeospatialFunctions.R
index 85f9a9d2..79cd11e9 100644
--- a/R/GeospatialFunctions.R
+++ b/R/GeospatialFunctions.R
@@ -20,7 +20,8 @@
 #'   characteristicName = "pH",
 #'   statecode = "SC",
 #'   countycode = "Abbeville",
-#'   applyautoclean = TRUE
+#'   applyautoclean = TRUE,
+#'   ask = FALSE
 #' )
 #'
 #' # make `tada_not_spatial` an sf object, projected in crs = 4269 (NAD83)
@@ -127,7 +128,8 @@ TADA_MakeSpatial <- function(.data, crs = 4326) {
 #'   endDate = "1995-12-31",
 #'   characteristicName = "pH",
 #'   statecode = "NV",
-#'   applyautoclean = TRUE
+#'   applyautoclean = TRUE,
+#'   ask = FALSE
 #' )
 #'
 #' nv_attains_features <- fetchATTAINS(tada_data, catchments_only = FALSE)
@@ -476,7 +478,8 @@ fetchATTAINS <- function(.data, catchments_only = FALSE) {
 #'   endDate = "1990-01-15",
 #'   characteristicName = "pH",
 #'   statecode = "CO",
-#'   applyautoclean = TRUE
+#'   applyautoclean = TRUE,
+#'   ask = FALSE
 #' )
 #'
 #' nhd_data <- fetchNHD(.data = tada_data, resolution = "Hi", features = c("catchments", "waterbodies", "flowlines"))
@@ -949,7 +952,8 @@ fetchNHD <- function(.data, resolution = "Hi", features = "catchments") {
 #'   endDate = "2018-07-31",
 #'   characteristicName = "pH",
 #'   statecode = "IL",
-#'   applyautoclean = TRUE
+#'   applyautoclean = TRUE,
+#'   ask = FALSE
 #' )
 #'
 #' # note: these example ATTAINS data retrieval queries below may take a long time (10+ minutes) to run
@@ -1373,7 +1377,8 @@ TADA_GetATTAINS <- function(.data, fill_catchments = FALSE, resolution = "Hi", r
 #'   endDate = "1995-12-31",
 #'   characteristicName = "pH",
 #'   statecode = "NV",
-#'   applyautoclean = TRUE
+#'   applyautoclean = TRUE,
+#'   ask = FALSE
 #' )
 #'
 #' attains_data <- TADA_GetATTAINS(tada_data, return_sf = TRUE)
diff --git a/R/ResultFlagsIndependent.R b/R/ResultFlagsIndependent.R
index 04c9f2c4..61c00580 100644
--- a/R/ResultFlagsIndependent.R
+++ b/R/ResultFlagsIndependent.R
@@ -171,7 +171,7 @@ TADA_FlagMethod <- function(.data, clean = TRUE, flaggedonly = FALSE) {
 #'
 #' @examples
 #' \dontrun{
-#' all_data <- TADA_DataRetrieval(project = c("Continuous LC1", "MA_Continuous", "Anchorage Bacteria 20-21"))
+#' all_data <- TADA_DataRetrieval(project = c("Continuous LC1", "MA_Continuous", "Anchorage Bacteria 20-21"), ask = FALSE)
 #'
 #' # Flag continuous data in new column titled "TADA.ContinuousData.Flag"
 #' all_data_flags <- TADA_FlagContinuousData(all_data, clean = FALSE)
@@ -1184,7 +1184,7 @@ TADA_FlagCoordinates <- function(.data,
 #' @examples
 #' \dontrun{
 #' # Load dataset
-#' dat <- TADA_DataRetrieval(startDate = "2022-09-01", endDate = "2023-05-01", statecode = "PA", sampleMedia = "Water")
+#' dat <- TADA_DataRetrieval(startDate = "2022-09-01", endDate = "2023-05-01", statecode = "PA", sampleMedia = "Water", ask = FALSE)
 #' unique(dat$OrganizationIdentifier)
 #' # If duplicates across organizations exist, pick the result belonging to "21PA_WQX" if available.
 #' dat1 <- TADA_FindPotentialDuplicatesMultipleOrgs(dat, dist_buffer = 100, org_hierarchy = c("21PA_WQX"))
diff --git a/R/Utilities.R b/R/Utilities.R
index efa91f3b..45b139e1 100644
--- a/R/Utilities.R
+++ b/R/Utilities.R
@@ -627,7 +627,7 @@ TADA_ConvertSpecialChars <- function(.data, col, percent.ave = TRUE) {
 #' This function uses the WQX Characteristic domain table to substitute
 #' deprecated (i.e. retired and/or suspect) Characteristic Names with the new
 #' name in the TADA.CharacteristicName column. TADA_SubstituteDeprecatedChars is
-#' run within TADA_AutoClean, which runs within TADA_DataRetreival and (if autoclean = TRUE)
+#' run within TADA_AutoClean, which runs within TADA_DataRetrieval and (if autoclean = TRUE)
 #' in TADA_BigDataRetrieval. Therefore, deprecated characteristic names are
 #' harmonized to the new name automatically upon data retrieval.
 #' TADA_SubstituteDeprecatedChars can also be used by itself on a user supplied
@@ -648,7 +648,7 @@ TADA_ConvertSpecialChars <- function(.data, col, percent.ave = TRUE) {
 #' @examples
 #' \dontrun{
 #' # download nutrient data in MT from 2022 and set autoclean = FALSE
-#' df <- TADA_DataRetrieval(startDate = "2022-01-01", endDate = "2022-12-31", characteristicType = "Nutrient", statecode = "MT", applyautoclean = FALSE)
+#' df <- TADA_DataRetrieval(startDate = "2022-01-01", endDate = "2022-12-31", characteristicType = "Nutrient", statecode = "MT", applyautoclean = FALSE, ask = FALSE)
 #' df2 <- TADA_SubstituteDeprecatedChars(df)
 #' # in this example, "Inorganic nitrogen (nitrate and nitrite)" is a USGS NWIS characteristic that is
 #' # deprecated and "Phosphate-phosphorus***retired***use Total Phosphorus, mixed forms" is a deprecated WQX
@@ -657,7 +657,7 @@ TADA_ConvertSpecialChars <- function(.data, col, percent.ave = TRUE) {
 #' unique(df2$CharacteristicName)
 #' unique(df2$TADA.CharacteristicName)
 #'
-#' df3 <- TADA_DataRetrieval(startDate = "2022-01-01", endDate = "2022-12-31", characteristicType = "Nutrient", statecode = "WY", applyautoclean = FALSE)
+#' df3 <- TADA_DataRetrieval(startDate = "2022-01-01", endDate = "2022-12-31", characteristicType = "Nutrient", statecode = "WY", applyautoclean = FALSE, ask = FALSE)
 #' df4 <- TADA_SubstituteDeprecatedChars(df3)
 #' unique(df4$CharacteristicName)
 #' unique(df4$TADA.CharacteristicName)
diff --git a/man/TADA_DataRetrieval.Rd b/man/TADA_DataRetrieval.Rd
index bcc02ad1..9701cf0c 100644
--- a/man/TADA_DataRetrieval.Rd
+++ b/man/TADA_DataRetrieval.Rd
@@ -121,24 +121,26 @@ errors when used with this function.
 # example for WI
 tada1 <- TADA_DataRetrieval(
   statecode = "WI", countycode = "Dane",
-  characteristicName = "Phosphorus"
+  characteristicName = "Phosphorus",
+  ask = FALSE
 )
 
 # example for UT
 tada2 <- TADA_DataRetrieval(
   statecode = "UT",
-  characteristicName = c("Ammonia", "Nitrate", "Nitrogen")
+  characteristicName = c("Ammonia", "Nitrate", "Nitrogen"),
+  ask = FALSE
 )
 
 # example for SC
-tada3 <- TADA_DataRetrieval(statecode = "SC", countycode = "Abbeville")
+tada3 <- TADA_DataRetrieval(statecode = "SC", countycode = "Abbeville", ask = FALSE)
 
 # example for CT
-tada4 <- TADA_DataRetrieval(statecode = "CT", startDate = "2020-10-01")
+tada4 <- TADA_DataRetrieval(statecode = "CT", startDate = "2020-10-01", ask = FALSE)
 
 
 # note that countycode queries require a statecode (see example below)
-tada5 <- TADA_DataRetrieval(countycode = "US:02:020")
+tada5 <- TADA_DataRetrieval(countycode = "US:02:020", ask = FALSE)
 
 # example for NM
 tada6 <- TADA_DataRetrieval(
@@ -148,11 +150,12 @@ tada6 <- TADA_DataRetrieval(
     "Nitrate",
     "Nitrogen"
   ),
-  startDate = "2020-05-01"
+  startDate = "2020-05-01",
+  ask = FALSE
 )
 
 # example for AK project
-tada7 <- TADA_DataRetrieval(project = "Anchorage Bacteria 20-21")
+tada7 <- TADA_DataRetrieval(project = "Anchorage Bacteria 20-21", ask = FALSE)
 
 # another example for AK
 tada8 <- TADA_DataRetrieval(
@@ -165,7 +168,8 @@ tada8 <- TADA_DataRetrieval(
     "Nitrate",
     "Nitrogen"
   ),
-  startDate = "2018-05-01"
+  startDate = "2018-05-01",
+  ask = FALSE
 )
 
 # example for tribes
@@ -208,7 +212,8 @@ tada9 <- TADA_DataRetrieval(organization = c(
   "CHOCNATWQX",
   "WNENVDPT_WQX",
   "PUEBLO_POJOAQUE"
-))
+),
+ask = FALSE)
 
 # query only NWIS data for a 10 year period in CT
 tada10 <- TADA_DataRetrieval(
@@ -217,13 +222,15 @@ tada10 <- TADA_DataRetrieval(
   sampleMedia = c("Water", "water"),
   statecode = "CT", # consider downloading only 1 state at a time
   providers = "NWIS",
-  applyautoclean = FALSE
+  applyautoclean = FALSE,
+  ask = FALSE
 )
 
 # query by country code (e.g. Canada, countrycode = "CA")
 tada11 <- TADA_DataRetrieval(
   startDate = "2015-01-01",
-  countrycode = "CA"
+  countrycode = "CA",
+  ask = FALSE
 )
 }
 
diff --git a/man/TADA_FindPotentialDuplicatesMultipleOrgs.Rd b/man/TADA_FindPotentialDuplicatesMultipleOrgs.Rd
index 8130dcc1..f30abeda 100644
--- a/man/TADA_FindPotentialDuplicatesMultipleOrgs.Rd
+++ b/man/TADA_FindPotentialDuplicatesMultipleOrgs.Rd
@@ -53,7 +53,7 @@ TADA_FindPotentialDuplicatesSingleOrg.
 \examples{
 \dontrun{
 # Load dataset
-dat <- TADA_DataRetrieval(startDate = "2022-09-01", endDate = "2023-05-01", statecode = "PA", sampleMedia = "Water")
+dat <- TADA_DataRetrieval(startDate = "2022-09-01", endDate = "2023-05-01", statecode = "PA", sampleMedia = "Water", ask = FALSE)
 unique(dat$OrganizationIdentifier)
 # If duplicates across organizations exist, pick the result belonging to "21PA_WQX" if available.
 dat1 <- TADA_FindPotentialDuplicatesMultipleOrgs(dat, dist_buffer = 100, org_hierarchy = c("21PA_WQX"))
diff --git a/man/TADA_FlagContinuousData.Rd b/man/TADA_FlagContinuousData.Rd
index c4488a86..c0de68ab 100644
--- a/man/TADA_FlagContinuousData.Rd
+++ b/man/TADA_FlagContinuousData.Rd
@@ -55,7 +55,7 @@ attachment at the activity level.
 }
 \examples{
 \dontrun{
-all_data <- TADA_DataRetrieval(project = c("Continuous LC1", "MA_Continuous", "Anchorage Bacteria 20-21"))
+all_data <- TADA_DataRetrieval(project = c("Continuous LC1", "MA_Continuous", "Anchorage Bacteria 20-21"), ask = FALSE)
 
 # Flag continuous data in new column titled "TADA.ContinuousData.Flag"
 all_data_flags <- TADA_FlagContinuousData(all_data, clean = FALSE)
diff --git a/man/TADA_GetATTAINS.Rd b/man/TADA_GetATTAINS.Rd
index 4aaa743d..341d17d6 100644
--- a/man/TADA_GetATTAINS.Rd
+++ b/man/TADA_GetATTAINS.Rd
@@ -40,7 +40,8 @@ tada_data <- TADA_DataRetrieval(
   endDate = "2018-07-31",
   characteristicName = "pH",
   statecode = "IL",
-  applyautoclean = TRUE
+  applyautoclean = TRUE,
+  ask = FALSE
 )
 
 # note: these example ATTAINS data retrieval queries below may take a long time (10+ minutes) to run
diff --git a/man/TADA_MakeSpatial.Rd b/man/TADA_MakeSpatial.Rd
index 54e9cb04..0a0735f1 100644
--- a/man/TADA_MakeSpatial.Rd
+++ b/man/TADA_MakeSpatial.Rd
@@ -27,7 +27,8 @@ tada_not_spatial <- TADA_DataRetrieval(
   characteristicName = "pH",
   statecode = "SC",
   countycode = "Abbeville",
-  applyautoclean = TRUE
+  applyautoclean = TRUE,
+  ask = FALSE
 )
 
 # make `tada_not_spatial` an sf object, projected in crs = 4269 (NAD83)
diff --git a/man/TADA_PairForCriteriaCalc.Rd b/man/TADA_PairForCriteriaCalc.Rd
index 83e893ba..83520a48 100644
--- a/man/TADA_PairForCriteriaCalc.Rd
+++ b/man/TADA_PairForCriteriaCalc.Rd
@@ -36,7 +36,8 @@ combinations for hardness, pH, temperature, salinity and chloride will be used.
 AL_df <- TADA_DataRetrieval(
   startDate = "2010-11-30",
   endDate = "2010-12-01",
-  statecode = "AL"
+  statecode = "AL",
+  ask = FALSE
 )
 
 AL_PairRef <- TADA_PairForCriteriaCalc(AL_df)
diff --git a/man/TADA_SubstituteDeprecatedChars.Rd b/man/TADA_SubstituteDeprecatedChars.Rd
index a53a3aa9..e5b06176 100644
--- a/man/TADA_SubstituteDeprecatedChars.Rd
+++ b/man/TADA_SubstituteDeprecatedChars.Rd
@@ -17,7 +17,7 @@ TADA.CharacteristicName column. Original columns are unchanged.
 This function uses the WQX Characteristic domain table to substitute
 deprecated (i.e. retired and/or suspect) Characteristic Names with the new
 name in the TADA.CharacteristicName column. TADA_SubstituteDeprecatedChars is
-run within TADA_AutoClean, which runs within TADA_DataRetreival and (if autoclean = TRUE)
+run within TADA_AutoClean, which runs within TADA_DataRetrieval and (if autoclean = TRUE)
 in TADA_BigDataRetrieval. Therefore, deprecated characteristic names are
 harmonized to the new name automatically upon data retrieval.
 TADA_SubstituteDeprecatedChars can also be used by itself on a user supplied
@@ -32,7 +32,7 @@ deprecated names (Char_Flag). This can be used as a crosswalk between the deprec
 \examples{
 \dontrun{
 # download nutrient data in MT from 2022 and set autoclean = FALSE
-df <- TADA_DataRetrieval(startDate = "2022-01-01", endDate = "2022-12-31", characteristicType = "Nutrient", statecode = "MT", applyautoclean = FALSE)
+df <- TADA_DataRetrieval(startDate = "2022-01-01", endDate = "2022-12-31", characteristicType = "Nutrient", statecode = "MT", applyautoclean = FALSE, ask = FALSE)
 df2 <- TADA_SubstituteDeprecatedChars(df)
 # in this example, "Inorganic nitrogen (nitrate and nitrite)" is a USGS NWIS characteristic that is
 # deprecated and "Phosphate-phosphorus***retired***use Total Phosphorus, mixed forms" is a deprecated WQX
@@ -41,7 +41,7 @@ df2 <- TADA_SubstituteDeprecatedChars(df)
 unique(df2$CharacteristicName)
 unique(df2$TADA.CharacteristicName)
 
-df3 <- TADA_DataRetrieval(startDate = "2022-01-01", endDate = "2022-12-31", characteristicType = "Nutrient", statecode = "WY", applyautoclean = FALSE)
+df3 <- TADA_DataRetrieval(startDate = "2022-01-01", endDate = "2022-12-31", characteristicType = "Nutrient", statecode = "WY", applyautoclean = FALSE, ask = FALSE)
 df4 <- TADA_SubstituteDeprecatedChars(df3)
 unique(df4$CharacteristicName)
 unique(df4$TADA.CharacteristicName)
diff --git a/man/TADA_ViewATTAINS.Rd b/man/TADA_ViewATTAINS.Rd
index 05f62503..adee99e9 100644
--- a/man/TADA_ViewATTAINS.Rd
+++ b/man/TADA_ViewATTAINS.Rd
@@ -29,7 +29,8 @@ tada_data <- TADA_DataRetrieval(
   endDate = "1995-12-31",
   characteristicName = "pH",
   statecode = "NV",
-  applyautoclean = TRUE
+  applyautoclean = TRUE,
+  ask = FALSE
 )
 
 attains_data <- TADA_GetATTAINS(tada_data, return_sf = TRUE)
diff --git a/man/fetchATTAINS.Rd b/man/fetchATTAINS.Rd
index 802b055a..959fa750 100644
--- a/man/fetchATTAINS.Rd
+++ b/man/fetchATTAINS.Rd
@@ -24,7 +24,8 @@ tada_data <- TADA_DataRetrieval(
   endDate = "1995-12-31",
   characteristicName = "pH",
   statecode = "NV",
-  applyautoclean = TRUE
+  applyautoclean = TRUE,
+  ask = FALSE
 )
 
 nv_attains_features <- fetchATTAINS(tada_data, catchments_only = FALSE)
diff --git a/man/fetchNHD.Rd b/man/fetchNHD.Rd
index e431af6e..9849185a 100644
--- a/man/fetchNHD.Rd
+++ b/man/fetchNHD.Rd
@@ -26,7 +26,8 @@ tada_data <- TADA_DataRetrieval(
   endDate = "1990-01-15",
   characteristicName = "pH",
   statecode = "CO",
-  applyautoclean = TRUE
+  applyautoclean = TRUE,
+  ask = FALSE
 )
 
 nhd_data <- fetchNHD(.data = tada_data, resolution = "Hi", features = c("catchments", "waterbodies", "flowlines"))

From dfd0b3c35f2533995e4ede558668b4f0b0ff9a6f Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Fri, 24 Jan 2025 10:20:27 -0800
Subject: [PATCH 23/35] Remove bigdataretrieval

---
 R/DataDiscoveryRetrieval.R | 289 -------------------------------------
 1 file changed, 289 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 6688cd8c..b703efe1 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -1174,295 +1174,6 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
   return(df_out)
 }
 
-#' Large WQP data pulls using dataRetrieval
-#'
-#' This function does multiple synchronous data calls to the WQP
-#' (waterqualitydata.us). It uses the WQP summary service to limit the amount
-#' downloaded to only relevant data (based on user query), pulls back data for
-#' 250000 records at a time, and then joins the data back together to produce a
-#' single TADA compatible dataframe as the output. For large data sets, that can save a lot
-#' of time and ultimately reduce the complexity of subsequent data processing.
-#' Using this function, you will be able to download all data available from all
-#' sites in the contiguous United States available for the time period,
-#' characteristicName, and siteType requested. Computer memory may limit the
-#' size of data frames that your R console will be able to hold in one session.
-#' Function requires a characteristicName, siteType, statecode, huc, or start/
-#' end date input. The recommendation is to be as specific as you can with your
-#' large data call. The function allows the user to run TADA_AutoClean on the data frame,
-#' but this is not the default as checking large dataframes for exact duplicate
-#' rows can be time consuming and is better performed on its own once the query is
-#' completed.
-#'
-#' Some code for this function was adapted from this USGS Blog (Author: Aliesha Krall)
-#' \href{https://waterdata.usgs.gov/blog/large_sample_pull/}{Large Sample Pull}
-#'
-#' See ?TADA_AutoClean documentation for more information on this optional input.
-#'
-#' Note: TADA_BigDataRetrieval (by leveraging USGS's dataRetrieval),  automatically converts
-#' the date times to UTC. It also automatically converts the data to dates,
-#' datetimes, numerics based on a standard algorithm. See: ?dataRetrieval::readWQPdata
-#'
-#' @param startDate Start Date string in the format YYYY-MM-DD, for example, "2020-01-01"
-#' @param endDate End Date string in the format YYYY-MM-DD, for example, "2020-01-01"
-#' @param countrycode Code that identifies a country or ocean (e.g. countrycode = "CA" for Canada, countrycode = "OA" for Atlantic Ocean). See https://www.waterqualitydata.us/Codes/countrycode for options.
-#' @param statecode FIPS state alpha code that identifies a state (e.g. statecode = "DE" for Delaware). See https://www.waterqualitydata.us/Codes/statecode for options.
-#' @param countycode FIPS county name. Note that a state code must also be supplied (e.g. statecode = "AL", countycode = "Chilton"). See https://www.waterqualitydata.us/Codes/countycode for options.
-#' @param huc A numeric code denoting a hydrologic unit. Example: "04030202". Different size hucs can be entered. See https://epa.maps.arcgis.com/home/item.html?id=796992f4588c401fabec7446ecc7a5a3 for a map with HUCS. Click on a HUC to find the associated code.
-#' @param siteid Unique monitoring location identifier.
-#' @param siteType Type of waterbody. See https://www.waterqualitydata.us/Codes/sitetype for options.
-#' @param characteristicName Name of parameter. See https://www.waterqualitydata.us/Codes/characteristicName for options.
-#' @param characteristicType Groups of environmental measurements/parameters. See https://www.waterqualitydata.us/Codes/characteristicType for options.
-#' @param sampleMedia Sampling substrate such as water, air, or sediment. See https://www.waterqualitydata.us/Codes/sampleMedia for options.
-#' @param organization A string of letters and/or numbers (some additional characters also possible) used to signify an organization with data in the Water Quality Portal. See https://www.waterqualitydata.us/Codes/organization for options.
-#' @param maxrecs The maximum number of results queried within one call to dataRetrieval.
-#' @param applyautoclean Logical, defaults to FALSE. If TRUE, runs TADA_AutoClean function on the returned data profile.
-#'
-#' @return TADA-compatible dataframe
-#'
-#' @export
-#'
-#' @examples
-#' \dontrun{
-#' # takes approx 3 mins to run
-#' tada1 <- TADA_BigDataRetrieval(startDate = "2019-01-01", endDate = "2021-12-31", characteristicName = "Temperature, water", statecode = c("AK", "AL"))
-#'
-#' # takes approx 21 mins
-#' tada2 <- TADA_BigDataRetrieval(startDate = "2016-10-01", endDate = "2022-09-30", statecode = "UT")
-#'
-#' # takes seconds to run
-#' tada3 <- TADA_BigDataRetrieval(huc = "04030202", characteristicName = "Escherichia coli")
-#'
-#' # takes approx 3 mins to run
-#' tada4 <- TADA_BigDataRetrieval(startDate = "2004-01-01", countrycode = "CA")
-#'
-#' # takes seconds to run
-#' tada5 <- TADA_BigDataRetrieval(startDate = "2018-01-01", statecode = "AL", countycode = "Chilton")
-#'
-#' # takes seconds to run
-#' tada6 <- TADA_BigDataRetrieval(organization = "PUEBLOOFTESUQUE")
-#' }
-#'
-TADA_BigDataRetrieval <- function(startDate = "null",
-                                  endDate = "null",
-                                  countrycode = "null",
-                                  statecode = "null",
-                                  countycode = "null",
-                                  huc = "null",
-                                  siteid = "null",
-                                  siteType = "null",
-                                  characteristicName = "null",
-                                  characteristicType = "null",
-                                  sampleMedia = "null",
-                                  organization = "null",
-                                  maxrecs = 250000,
-                                  applyautoclean = FALSE) {
-  start_T <- Sys.time()
-
-  if (!"null" %in% statecode & !"null" %in% huc) {
-    stop("Please provide either state code(s) OR huc(s) to proceed.")
-  }
-
-  if (!startDate == "null") {
-    startDat <- lubridate::ymd(startDate)
-    startYearLo <- lubridate::year(startDat)
-  } else { # else: pick a date before which any data are unlikely to be in WQP
-    startDate <- "1800-01-01"
-    startDat <- lubridate::ymd(startDate)
-    startYearLo <- lubridate::year(startDat)
-  }
-
-  # Logic: if the input endDate is not null, convert to date and obtain year
-  # for summary
-  if (!endDate == "null") {
-    endDat <- lubridate::ymd(endDate)
-    endYearHi <- lubridate::year(endDat)
-  } else { # else: if not populated, default to using today's date/year for summary
-    endDate <- as.character(Sys.Date())
-    endDat <- lubridate::ymd(endDate)
-    endYearHi <- lubridate::year(endDat)
-  }
-
-  # Create readWQPsummary query
-  WQPquery <- list()
-  if (length(characteristicName) > 1) {
-    WQPquery <- c(WQPquery, characteristicName = list(characteristicName))
-  } else if (characteristicName != "null") {
-    WQPquery <- c(WQPquery, characteristicName = characteristicName)
-  }
-  if (length(characteristicType) > 1) {
-    WQPquery <- c(WQPquery, characteristicType = list(characteristicType))
-  } else if (characteristicType != "null") {
-    WQPquery <- c(WQPquery, characteristicType = characteristicType)
-  }
-  if (length(siteType) > 1) {
-    WQPquery <- c(WQPquery, siteType = list(siteType))
-  } else if (siteType != "null") {
-    WQPquery <- c(WQPquery, siteType = siteType)
-  }
-
-  if (!"null" %in% statecode) {
-    load(system.file("extdata", "statecodes_df.Rdata", package = "EPATADA"))
-    statecode <- as.character(statecode)
-    statecodes_sub <- statecodes_df %>% dplyr::filter(STUSAB %in% statecode)
-    statecd <- paste0("US:", statecodes_sub$STATE)
-    if (nrow(statecodes_sub) == 0) {
-      stop("State code is not valid. Check FIPS state/territory abbreviations.")
-    }
-    if (length(statecode) > 1) {
-      for (i in 1:length(statecode)) {
-        WQPquery <- c(WQPquery, statecode = list(statecd))
-      }
-      WQPquery <- c(WQPquery, statecode = list(statecd))
-    } else {
-      WQPquery <- c(WQPquery, statecode = statecd)
-    }
-  }
-
-  if (length(huc) > 1) {
-    WQPquery <- c(WQPquery, huc = list(huc))
-  } else if (huc != "null") {
-    WQPquery <- c(WQPquery, huc = huc)
-  }
-
-  if (length(countrycode) > 1) {
-    WQPquery <- c(WQPquery, countrycode = list(countrycode))
-  } else if (countrycode != "null") {
-    WQPquery <- c(WQPquery, countrycode = countrycode)
-  }
-
-  if (length(countycode) > 1) {
-    WQPquery <- c(WQPquery, countycode = list(countycode))
-  } else if (countycode != "null") {
-    WQPquery <- c(WQPquery, countycode = countycode)
-  }
-
-  if (length(organization) > 1) {
-    WQPquery <- c(WQPquery, organization = list(organization))
-  } else if (organization != "null") {
-    WQPquery <- c(WQPquery, organization = organization)
-  }
-
-  # cut down on summary query time if possible based on big data query
-  diffdat <- lubridate::time_length(difftime(Sys.Date(), startDat), "years")
-
-  if (diffdat <= 1) {
-    WQPquery <- c(WQPquery, summaryYears = 1)
-  }
-
-  if (diffdat > 1 & diffdat <= 5) {
-    WQPquery <- c(WQPquery, summaryYears = 5)
-  }
-
-  print("Building site summary table for chunking result downloads...")
-  df_summary <- dataRetrieval::readWQPsummary(WQPquery)
-
-  ## NOTE: if query brings back no results, function returns empty
-  # dataRetrieval profile, not empty summary
-  if (nrow(df_summary) > 0) {
-    # narrow down to years of interest from summary
-    sites <- df_summary %>%
-      dplyr::filter(
-        YearSummarized >= startYearLo,
-        YearSummarized <= endYearHi
-      )
-
-    rm(df_summary)
-    # if there are still site records when filtered to years of interest....
-    if (dim(sites)[1] > 0) {
-      # get total number of results per site and separate out sites with >250000 results
-      tot_sites <- sites %>%
-        dplyr::group_by(MonitoringLocationIdentifier) %>%
-        dplyr::summarise(tot_n = sum(ResultCount)) %>%
-        dplyr::arrange(tot_n)
-      smallsites <- tot_sites %>% dplyr::filter(tot_n <= maxrecs)
-      bigsites <- tot_sites %>% dplyr::filter(tot_n > maxrecs)
-
-      df <- data.frame()
-
-      if (dim(smallsites)[1] > 0) {
-        smallsitesgrp <- make_groups(smallsites, maxrecs)
-
-        print(paste0("Downloading data from sites with fewer than ", maxrecs, " results by grouping them together."))
-
-        for (i in 1:max(smallsitesgrp$group)) {
-          site_chunk <- subset(smallsitesgrp$MonitoringLocationIdentifier, smallsitesgrp$group == i)
-          joins <- TADA_DataRetrieval(
-            startDate = startDate,
-            endDate = endDate,
-            siteid = site_chunk,
-            characteristicName = characteristicName,
-            characteristicType = characteristicType,
-            sampleMedia = sampleMedia,
-            applyautoclean = FALSE
-          )
-          if (dim(joins)[1] > 0) {
-            df <- dplyr::bind_rows(df, joins)
-          }
-        }
-
-        rm(smallsites, smallsitesgrp)
-      }
-
-      if (dim(bigsites)[1] > 0) {
-        print(paste0("Downloading data from sites with greater than ", maxrecs, " results, chunking queries by shorter time intervals..."))
-
-        bsitesvec <- unique(bigsites$MonitoringLocationIdentifier)
-
-        for (i in 1:length(bsitesvec)) {
-          mlidsum <- subset(sites, sites$MonitoringLocationIdentifier == bsitesvec[i])
-          mlidsum <- mlidsum %>%
-            dplyr::group_by(MonitoringLocationIdentifier, YearSummarized) %>%
-            dplyr::summarise(tot_n = sum(ResultCount))
-          site_chunk <- unique(mlidsum$MonitoringLocationIdentifier)
-
-          bigsitegrps <- make_groups(mlidsum, maxrecs)
-
-          for (i in 1:max(bigsitegrps$group)) {
-            yearchunk <- subset(bigsitegrps$YearSummarized, bigsitegrps$group == i)
-            startD <- paste0(min(yearchunk), "-01-01")
-            endD <- paste0(max(yearchunk), "-12-31")
-
-            joins <- TADA_DataRetrieval(
-              startDate = startD,
-              endDate = endD,
-              siteid = site_chunk,
-              characteristicName = characteristicName,
-              characteristicType = characteristicType,
-              sampleMedia = sampleMedia,
-              applyautoclean = FALSE
-            )
-
-            if (dim(joins)[1] > 0) {
-              df <- dplyr::bind_rows(df, joins)
-            }
-          }
-        }
-        rm(bigsites, bigsitegrps)
-      }
-    } else {
-      warning("Query returned no data. Function returns an empty dataframe.")
-      return(sites)
-    }
-  } else {
-    warning("Query returned no data. Function returns an empty dataframe.")
-    return(df_summary)
-  }
-
-  df <- subset(df, as.Date(df$ActivityStartDate, "%Y-%m-%d") >= startDat & as.Date(df$ActivityStartDate, "%Y-%m-%d") <= endDat)
-
-  if (applyautoclean == TRUE) {
-    print("Applying TADA_AutoClean function...")
-    df <- TADA_AutoClean(df)
-  }
-
-  # timing function for efficiency tests.
-  difference <- difftime(Sys.time(), start_T, units = "mins")
-  print(difference)
-
-  return(df)
-}
-
-
 
 #' Join WQP Profiles
 #'

From abe653fbc296a447c0504523bbcd3c01064fe0e1 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Fri, 24 Jan 2025 10:37:25 -0800
Subject: [PATCH 24/35] Apply suggestions from code review

Co-authored-by: B Steele <32140074+steeleb@users.noreply.github.com>
---
 R/DataDiscoveryRetrieval.R | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index b703efe1..b8a87e44 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -339,7 +339,7 @@ TADA_DataRetrieval <- function(startDate = "null",
     # If tribe info is provided then grab the corresponding sf object:
     if (tribal_area_type != "null") {
       # Make a reference table for tribal area type + url matching
-      # (options that don't return results are commented out for now)
+      # (options that don't return results are commented out)
       map_service_urls <- tibble::tribble(
         ~tribal_area,                            ~url,
         "Alaska Native Allotments",              "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/0",
@@ -364,11 +364,9 @@ TADA_DataRetrieval <- function(startDate = "null",
 
       # These area types allow filtering by TRIBE_NAME (unique within each type)
       if (tribal_area_type %in% c(
-        # "Alaska Native Villages",
         "American Indian Reservations",
         "Off-reservation Trust Lands",
-        "Oklahoma Tribal Statistical Areas" # ,
-        # "Virginia Federally Recognized Tribes"
+        "Oklahoma Tribal Statistical Areas" 
       )
       ) {
         # Get the relevant url
@@ -405,7 +403,7 @@ TADA_DataRetrieval <- function(startDate = "null",
             }
           }
       } else {
-        stop("Tribal area type not recognized. Refer to TADA_TribalOptions() for query options.")
+        stop("Tribal area type or tribal name parcel not recognized. Refer to TADA_TribalOptions() for query options.")
       }
     }
 
@@ -957,7 +955,7 @@ TADA_TribalOptions <- function(tribal_area_type, return_sf = FALSE) {
   # Convert to df if needed, export
   if (return_sf == FALSE) {
     return(
-      as.data.frame(tribal_area_sf) %>%
+      tribal_area_sf %>%
         sf::st_drop_geometry()
     )
   } else {

From acab23e03e3f1244d77b68786938d888fb383b2e Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Fri, 24 Jan 2025 11:58:55 -0800
Subject: [PATCH 25/35] Documentation and messaging

---
 R/DataDiscoveryRetrieval.R | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index b8a87e44..fe06f0d0 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -46,8 +46,8 @@
 #' @param huc A numeric code denoting a hydrologic unit. Example: "04030202". Different size hucs can be entered. See https://epa.maps.arcgis.com/home/item.html?id=796992f4588c401fabec7446ecc7a5a3 for a map with HUCS. Click on a HUC to find the associated code.
 #' @param siteid Unique monitoring location identifier.
 #' @param siteType Type of waterbody. See https://www.waterqualitydata.us/Codes/sitetype for options.
-#' @param tribal_area_type One of the six tribal spatial layers: "Alaska Native Allotments", "Alaska Native Villages",  "American Indian Reservations", "Off-reservation Trust Lands", "Oklahoma Tribal Statistical Areas", or "Virginia Federally Recognized Tribes".
-#' @param tribe_name_parcel The name of a tribe corresponding to an entry in the TRIBE_NAME field of the specified tribal_area_type. OR if the type is Alaska Native Allotments" then the corresponding PARCEL_NO.
+#' @param tribal_area_type One of four tribal spatial layers: "Alaska Native Allotments", "American Indian Reservations", "Off-reservation Trust Lands", or "Oklahoma Tribal Statistical Areas". More info in TADA_TribalOptions(). Note that "Alaska Native Villages" and "Virginia Federally Recognized Tribes" layers will not return a successful query.
+#' @param tribe_name_parcel The name of a tribe corresponding to an entry in the TRIBE_NAME field of the specified tribal_area_type. OR if the type is "Alaska Native Allotments" then the corresponding PARCEL_NO. More info in TADA_TribalOptions().
 #' @param characteristicName Name of parameter. See https://www.waterqualitydata.us/Codes/characteristicName for options.
 #' @param characteristicType Groups of environmental measurements/parameters. See https://www.waterqualitydata.us/Codes/characteristicType for options.
 #' @param sampleMedia Sampling substrate such as water, air, or sediment. See https://www.waterqualitydata.us/Codes/sampleMedia for options.
@@ -254,7 +254,12 @@ TADA_DataRetrieval <- function(startDate = "null",
     )
   }
 
-  # Insufficient tribal info provided
+  # Insufficient tribal info provided:
+  # Type but no parcel
+  if ((tribal_area_type != "null") & all(tribe_name_parcel == "null")) {
+    stop("A tribe_name_parcel is required if tribal_area_type is provided.")
+  }
+  # Parcel but no type
   if ((tribal_area_type == "null") & all(tribe_name_parcel != "null")) {
     stop("A tribal_area_type is required if tribe_name_parcel is provided.")
   }
@@ -423,6 +428,8 @@ TADA_DataRetrieval <- function(startDate = "null",
     quiet_whatWQPdata <- purrr::quietly(dataRetrieval::whatWQPdata)
 
     # Try getting WQP info
+    message("Checking what data is available. This may take a moment.")
+    
     quiet_bbox_avail <- quiet_whatWQPdata(
       WQPquery,
       bBox = c(input_bbox$xmin, input_bbox$ymin, input_bbox$xmax, input_bbox$ymax)
@@ -743,6 +750,8 @@ TADA_DataRetrieval <- function(startDate = "null",
     }
 
     # Query info on available data
+    message("Checking what data is available. This may take a moment.")
+    
     # Don't want to print every message that's returned by WQP
     quiet_whatWQPdata <- purrr::quietly(dataRetrieval::whatWQPdata)
 

From 4b3ec7f0b608c7ba0c05df16e4f72ab6c6ae728a Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Fri, 24 Jan 2025 12:14:56 -0800
Subject: [PATCH 26/35] Clarify tribe_name_parcel reqs

---
 R/DataDiscoveryRetrieval.R | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index fe06f0d0..5df7f847 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -47,7 +47,7 @@
 #' @param siteid Unique monitoring location identifier.
 #' @param siteType Type of waterbody. See https://www.waterqualitydata.us/Codes/sitetype for options.
 #' @param tribal_area_type One of four tribal spatial layers: "Alaska Native Allotments", "American Indian Reservations", "Off-reservation Trust Lands", or "Oklahoma Tribal Statistical Areas". More info in TADA_TribalOptions(). Note that "Alaska Native Villages" and "Virginia Federally Recognized Tribes" layers will not return a successful query.
-#' @param tribe_name_parcel The name of a tribe corresponding to an entry in the TRIBE_NAME field of the specified tribal_area_type. OR if the type is "Alaska Native Allotments" then the corresponding PARCEL_NO. More info in TADA_TribalOptions().
+#' @param tribe_name_parcel The name of one or more tribes corresponding to entries in the TRIBE_NAME field of the specified tribal_area_type. OR if the type is "Alaska Native Allotments" then the corresponding PARCEL_NO. More info in TADA_TribalOptions().
 #' @param characteristicName Name of parameter. See https://www.waterqualitydata.us/Codes/characteristicName for options.
 #' @param characteristicType Groups of environmental measurements/parameters. See https://www.waterqualitydata.us/Codes/characteristicType for options.
 #' @param sampleMedia Sampling substrate such as water, air, or sediment. See https://www.waterqualitydata.us/Codes/sampleMedia for options.
@@ -206,6 +206,12 @@ TADA_DataRetrieval <- function(startDate = "null",
                                maxrecs = 250000,
                                ask = TRUE,
                                applyautoclean = TRUE) {
+  
+  # Require one tribal area type:
+  if (length(tribal_area_type == "null") >1) {
+    stop("The tribal_area_type argument only accepts a single value, but multiple have been provided.")
+  }
+  
   # Check for incomplete or inconsistent inputs:
 
   # If both an sf object and tribe information are provided it's unclear what
@@ -263,7 +269,7 @@ TADA_DataRetrieval <- function(startDate = "null",
   if ((tribal_area_type == "null") & all(tribe_name_parcel != "null")) {
     stop("A tribal_area_type is required if tribe_name_parcel is provided.")
   }
-
+  
   # If an sf object OR tribal info are provided they will be the basis of the query
   # (The tribal data handling uses sf objects as well)
   if ((!is.null(aoi_sf) & inherits(aoi_sf, "sf")) | (tribal_area_type != "null")) {

From c3e40e31c56d29348f5da74986180b3b0710c747 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Fri, 24 Jan 2025 12:19:22 -0800
Subject: [PATCH 27/35] Redo tribal_area_type check

---
 R/DataDiscoveryRetrieval.R | 303 ++++++++++++++++++-------------------
 1 file changed, 149 insertions(+), 154 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 5df7f847..c658aa5f 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -208,16 +208,16 @@ TADA_DataRetrieval <- function(startDate = "null",
                                applyautoclean = TRUE) {
   
   # Require one tribal area type:
-  if (length(tribal_area_type == "null") >1) {
-    stop("The tribal_area_type argument only accepts a single value, but multiple have been provided.")
+  if (length(tribal_area_type) > 1) {
+    stop("tribal_area_type must be of length 1.")
   }
   
   # Check for incomplete or inconsistent inputs:
-
+  
   # If both an sf object and tribe information are provided it's unclear what
   # the priority should be for the query
   if (!is.null(aoi_sf) &
-    any((tribal_area_type != "null") | (tribe_name_parcel != "null"))) {
+      any((tribal_area_type != "null") | (tribe_name_parcel != "null"))) {
     stop(
       paste0(
         "Both sf data and tribal information have been provided. ",
@@ -225,17 +225,17 @@ TADA_DataRetrieval <- function(startDate = "null",
       )
     )
   }
-
+  
   # Check for other arguments that indicate location. Function will ignore
   # these inputs but warn the user
   if (
     # sf object provided
     (!is.null(aoi_sf) & inherits(aoi_sf, "sf")) &
-      # with additional location info
-      any(
-        (countrycode != "null"), (countycode != "null"), (huc != "null"),
-        (siteid != "null"), (statecode != "null")
-      )
+    # with additional location info
+    any(
+      (countrycode != "null"), (countycode != "null"), (huc != "null"),
+      (siteid != "null"), (statecode != "null")
+    )
   ) {
     warning(
       paste0(
@@ -246,11 +246,11 @@ TADA_DataRetrieval <- function(startDate = "null",
   } else if (
     # Tribe info provided
     (tribal_area_type != "null") &
-      # with additional location info
-      any(
-        (countrycode != "null"), (countycode != "null"), (huc != "null"),
-        (siteid != "null"), (statecode != "null")
-      )
+    # with additional location info
+    any(
+      (countrycode != "null"), (countycode != "null"), (huc != "null"),
+      (siteid != "null"), (statecode != "null")
+    )
   ) {
     warning(
       paste0(
@@ -259,7 +259,7 @@ TADA_DataRetrieval <- function(startDate = "null",
       )
     )
   }
-
+  
   # Insufficient tribal info provided:
   # Type but no parcel
   if ((tribal_area_type != "null") & all(tribe_name_parcel == "null")) {
@@ -274,10 +274,10 @@ TADA_DataRetrieval <- function(startDate = "null",
   # (The tribal data handling uses sf objects as well)
   if ((!is.null(aoi_sf) & inherits(aoi_sf, "sf")) | (tribal_area_type != "null")) {
     # Build the non-sf part of the query:
-
+    
     # Set query parameters
     WQPquery <- list()
-
+    
     # StartDate
     if (length(startDate) > 1) {
       if (is.na(suppressWarnings(lubridate::parse_date_time(startDate[1], orders = "ymd")))) {
@@ -344,9 +344,9 @@ TADA_DataRetrieval <- function(startDate = "null",
       }
       WQPquery <- c(WQPquery, endDate = endDate)
     }
-
+    
     # sf AOI prep for query
-
+    
     # If tribe info is provided then grab the corresponding sf object:
     if (tribal_area_type != "null") {
       # Make a reference table for tribal area type + url matching
@@ -360,19 +360,14 @@ TADA_DataRetrieval <- function(startDate = "null",
         "Oklahoma Tribal Statistical Areas",     "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/4" # ,
         # "Virginia Federally Recognized Tribes",  "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/5"
       )
-
-      # Keep to a single type:
-      if (length(tribal_area_type) > 1) {
-        stop("tribal_area_type must be of length 1.")
-      }
-
+      
       # These two layers will not return any data when used for bboxes
       if (tribal_area_type == "Alaska Native Villages") {
         stop("Alaska Native Villages data are centroid points, not spatial boundaries.")
       } else if (tribal_area_type == "Virginia Federally Recognized Tribes") {
         stop("Federally recognized tribal entities in Virginia do not have any available spatial boundaries.")
       }
-
+      
       # These area types allow filtering by TRIBE_NAME (unique within each type)
       if (tribal_area_type %in% c(
         "American Indian Reservations",
@@ -397,7 +392,7 @@ TADA_DataRetrieval <- function(startDate = "null",
               .
             }
           }
-
+        
         # Otherwise filter by PARCEL_NO (Note that values in this col are not unique)
       } else if (tribal_area_type == "Alaska Native Allotments") {
         aoi_sf <- dplyr::filter(
@@ -417,22 +412,22 @@ TADA_DataRetrieval <- function(startDate = "null",
         stop("Tribal area type or tribal name parcel not recognized. Refer to TADA_TribalOptions() for query options.")
       }
     }
-
+    
     # Check and/or fix geometry
     aoi_sf <- sf::st_make_valid(aoi_sf)
-
+    
     # Match CRS
     if (sf::st_crs(aoi_sf) != 4326) {
       aoi_sf <- sf::st_transform(aoi_sf, crs = 4326)
     }
-
+    
     # Get bbox of the sf object
     input_bbox <- sf::st_bbox(aoi_sf)
-
+    
     # Query info on available data within the bbox
     # Don't want to print every message that's returned by WQP
     quiet_whatWQPdata <- purrr::quietly(dataRetrieval::whatWQPdata)
-
+    
     # Try getting WQP info
     message("Checking what data is available. This may take a moment.")
     
@@ -440,63 +435,63 @@ TADA_DataRetrieval <- function(startDate = "null",
       WQPquery,
       bBox = c(input_bbox$xmin, input_bbox$ymin, input_bbox$xmax, input_bbox$ymax)
     )
-
+    
     # Alert & stop if an http error was received
     if (is.null(quiet_bbox_avail$result)) {
       stop_message <- quiet_bbox_avail$messages %>%
         grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
         paste("\n", ., collapse = "") %>%
         paste("The WQP request returned a NULL with the following message(s): \n",
-          .,
-          collapse = "\n"
+              .,
+              collapse = "\n"
         )
-
+      
       stop(stop_message)
     }
-
+    
     # Use result only
     bbox_avail <- quiet_bbox_avail$result
-
+    
     # Check if any sites are within the aoi
     if ((nrow(bbox_avail) > 0) == FALSE) {
       stop("No monitoring sites were returned within your area of interest (no data available).")
     }
-
-
+    
+    
     quiet_whatWQPsites <- purrr::quietly(dataRetrieval::whatWQPsites)
-
+    
     quiet_bbox_sites <- quiet_whatWQPsites(
       siteid = bbox_avail$MonitoringLocationIdentifier
     )
-
+    
     if (is.null(quiet_bbox_sites$result)) {
       stop_message <- quiet_bbox_sites$messages %>%
         grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
         paste("\n", ., collapse = "") %>%
         paste("The WQP request returned a NULL with the following message(s): \n",
-          .,
-          collapse = "\n"
+              .,
+              collapse = "\n"
         )
       stop(stop_message)
     }
-
+    
     # Reformat returned info as sf
     bbox_sites_sf <- TADA_MakeSpatial(quiet_bbox_sites$result, crs = 4326)
-
+    
     # Subset sites to only within shapefile and get IDs
     clipped_sites_sf <- bbox_sites_sf[aoi_sf, ]
-
+    
     clipped_site_ids <- clipped_sites_sf$MonitoringLocationIdentifier
-
+    
     record_count <- bbox_avail %>%
       dplyr::filter(MonitoringLocationIdentifier %in% clipped_site_ids) %>%
       dplyr::pull(resultCount) %>%
       sum()
-
+    
     # Should we proceed with downloads? If ask == TRUE then ask the user.
     if (ask == TRUE) {
       user_decision <- ask_user(n_records = record_count)
-
+      
       # Act on input
       if (user_decision == "yes") {
         print("Proceeding with download.")
@@ -504,10 +499,10 @@ TADA_DataRetrieval <- function(startDate = "null",
         stop("Cancelled by user.", call. = FALSE)
       }
     }
-
+    
     # Continue now with site count
     site_count <- length(clipped_site_ids)
-
+    
     # Check for either more than 300 sites or more records than max_recs.
     # If either is true then we'll approach the pull as a "big data" pull
     if (site_count > 300 | record_count > maxrecs) {
@@ -517,7 +512,7 @@ TADA_DataRetrieval <- function(startDate = "null",
           "If your AOI is a county, state, country, or HUC boundary it would be more efficient to provide a code instead of an sf object."
         )
       )
-
+      
       # Use helper function to download large data volume
       results.DR <- withCallingHandlers(
         TADA_BigDataHelper(
@@ -530,11 +525,11 @@ TADA_DataRetrieval <- function(startDate = "null",
         ),
         message = function(m) message(m$message)
       )
-
-
+      
+      
       rm(bbox_avail, bbox_sites_sf)
       gc()
-
+      
       # Check if any results were returned
       if ((nrow(results.DR) > 0) == FALSE) {
         print(
@@ -552,7 +547,7 @@ TADA_DataRetrieval <- function(startDate = "null",
         sites.DR <- clipped_sites_sf %>%
           dplyr::as_tibble() %>%
           dplyr::select(-geometry)
-
+        
         # Get project metadata
         projects.DR <- suppressMessages(
           dataRetrieval::readWQPdata(
@@ -562,7 +557,7 @@ TADA_DataRetrieval <- function(startDate = "null",
             service = "Project"
           )
         )
-
+        
         # Join results, sites, projects
         TADAprofile <- TADA_JoinWQPProfiles(
           FullPhysChem = results.DR,
@@ -571,25 +566,25 @@ TADA_DataRetrieval <- function(startDate = "null",
         ) %>% dplyr::mutate(
           dplyr::across(tidyselect::everything(), as.character)
         )
-
+        
         # run TADA_AutoClean function
         if (applyautoclean == TRUE) {
           print("Data successfully downloaded. Running TADA_AutoClean function.")
-
+          
           TADAprofile.clean <- TADA_AutoClean(TADAprofile)
         } else {
           TADAprofile.clean <- TADAprofile
         }
       }
-
+      
       return(TADAprofile.clean)
-
+      
       # Doesn't meet "big data" threshold:
     } else {
       # Retrieve all 3 profiles
       print("Downloading WQP query results. This may take some time depending upon the query size.")
       print(WQPquery)
-
+      
       # Get results
       results.DR <- suppressMessages(
         dataRetrieval::readWQPdata(
@@ -599,7 +594,7 @@ TADA_DataRetrieval <- function(startDate = "null",
           ignore_attributes = TRUE
         )
       )
-
+      
       # Check if any results were returned
       if ((nrow(results.DR) > 0) == FALSE) {
         paste0(
@@ -614,7 +609,7 @@ TADA_DataRetrieval <- function(startDate = "null",
         sites.DR <- clipped_sites_sf %>%
           dplyr::as_tibble() %>%
           dplyr::select(-geometry)
-
+        
         # Get project metadata
         projects.DR <- suppressMessages(
           dataRetrieval::readWQPdata(
@@ -624,7 +619,7 @@ TADA_DataRetrieval <- function(startDate = "null",
             service = "Project"
           )
         )
-
+        
         # Join results, sites, projects
         TADAprofile <- TADA_JoinWQPProfiles(
           FullPhysChem = results.DR,
@@ -633,25 +628,25 @@ TADA_DataRetrieval <- function(startDate = "null",
         ) %>% dplyr::mutate(
           dplyr::across(tidyselect::everything(), as.character)
         )
-
+        
         # Run TADA_AutoClean function
         if (applyautoclean == TRUE) {
           print("Data successfully downloaded. Running TADA_AutoClean function.")
-
+          
           TADAprofile.clean <- TADA_AutoClean(TADAprofile)
         } else {
           TADAprofile.clean <- TADAprofile
         }
       }
-
+      
       return(TADAprofile.clean)
     }
-
+    
     # If no sf object provided:
   } else {
     # Set query parameters
     WQPquery <- list()
-
+    
     if (!"null" %in% statecode) {
       load(system.file("extdata", "statecodes_df.Rdata", package = "EPATADA"))
       statecode <- as.character(statecode)
@@ -664,13 +659,13 @@ TADA_DataRetrieval <- function(startDate = "null",
         WQPquery <- c(WQPquery, statecode = list(statecd))
       }
     }
-
+    
     if (length(huc) > 1) {
       WQPquery <- c(WQPquery, huc = list(huc))
     } else if (huc != "null") {
       WQPquery <- c(WQPquery, huc = huc)
     }
-
+    
     if (length(startDate) > 1) {
       if (is.na(suppressWarnings(lubridate::parse_date_time(startDate[1], orders = "ymd")))) {
         stop("Incorrect date format. Please use the format YYYY-MM-DD.")
@@ -682,67 +677,67 @@ TADA_DataRetrieval <- function(startDate = "null",
       }
       WQPquery <- c(WQPquery, startDate = startDate)
     }
-
+    
     if (length(countrycode) > 1) {
       WQPquery <- c(WQPquery, countrycode = list(countrycode))
     } else if (countrycode != "null") {
       WQPquery <- c(WQPquery, countrycode = countrycode)
     }
-
+    
     if (length(countycode) > 1) {
       WQPquery <- c(WQPquery, countycode = list(countycode))
     } else if (countycode != "null") {
       WQPquery <- c(WQPquery, countycode = countycode)
     }
-
+    
     if (length(siteid) > 1) {
       WQPquery <- c(WQPquery, siteid = list(siteid))
     } else if (siteid != "null") {
       WQPquery <- c(WQPquery, siteid = siteid)
     }
-
+    
     if (length(siteType) > 1) {
       WQPquery <- c(WQPquery, siteType = list(siteType))
     } else if (siteType != "null") {
       WQPquery <- c(WQPquery, siteType = siteType)
     }
-
+    
     if (length(characteristicName) > 1) {
       WQPquery <- c(WQPquery, characteristicName = list(characteristicName))
     } else if (characteristicName != "null") {
       WQPquery <- c(WQPquery, characteristicName = characteristicName)
     }
-
+    
     if (length(characteristicType) > 1) {
       WQPquery <- c(WQPquery, characteristicType = list(characteristicType))
     } else if (characteristicType != "null") {
       WQPquery <- c(WQPquery, characteristicType = characteristicType)
     }
-
+    
     if (length(sampleMedia) > 1) {
       WQPquery <- c(WQPquery, sampleMedia = list(sampleMedia))
     } else if (sampleMedia != "null") {
       WQPquery <- c(WQPquery, sampleMedia = sampleMedia)
     }
-
+    
     if (length(project) > 1) {
       WQPquery <- c(WQPquery, project = list(project))
     } else if (project != "null") {
       WQPquery <- c(WQPquery, project = project)
     }
-
+    
     if (length(providers) > 1) {
       WQPquery <- c(WQPquery, providers = list(providers))
     } else if (providers != "null") {
       WQPquery <- c(WQPquery, providers = providers)
     }
-
+    
     if (length(organization) > 1) {
       WQPquery <- c(WQPquery, organization = list(organization))
     } else if (organization != "null") {
       WQPquery <- c(WQPquery, organization = organization)
     }
-
+    
     if (length(endDate) > 1) {
       if (is.na(suppressWarnings(lubridate::parse_date_time(endDate[1], orders = "ymd")))) {
         stop("Incorrect date format. Please use the format YYYY-MM-DD.")
@@ -754,39 +749,39 @@ TADA_DataRetrieval <- function(startDate = "null",
       }
       WQPquery <- c(WQPquery, endDate = endDate)
     }
-
+    
     # Query info on available data
     message("Checking what data is available. This may take a moment.")
     
     # Don't want to print every message that's returned by WQP
     quiet_whatWQPdata <- purrr::quietly(dataRetrieval::whatWQPdata)
-
+    
     quiet_query_avail <- quiet_whatWQPdata(WQPquery)
-
+    
     if (is.null(quiet_query_avail$result)) {
       stop_message <- quiet_query_avail$messages %>%
         grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
         paste("\n", ., collapse = "") %>%
         paste("The WQP request returned a NULL with the following message(s): \n",
-          .,
-          collapse = "\n"
+              .,
+              collapse = "\n"
         )
-
+      
       stop(stop_message)
     }
-
+    
     query_avail <- quiet_query_avail$result
-
+    
     site_count <- length(query_avail$MonitoringLocationIdentifier)
-
+    
     record_count <- query_avail %>%
       dplyr::pull(resultCount) %>%
       sum()
-
+    
     # Should we proceed with downloads? If ask == TRUE then ask the user.
     if (ask == TRUE) {
       user_decision <- ask_user(n_records = record_count)
-
+      
       # Act on input
       if (user_decision == "yes") {
         print("Proceeding with download.")
@@ -794,14 +789,14 @@ TADA_DataRetrieval <- function(startDate = "null",
         stop("Cancelled by user.", call. = FALSE)
       }
     }
-
+    
     # Check for either more than 300 sites or more records than max_recs.
     # If either is true then we'll approach the pull as a "big data" pull
     if (site_count > 300 | record_count > maxrecs) {
       message(
         "The number of sites and/or records matched by the query terms is large, so the download may take some time."
       )
-
+      
       # Use helper function to download large data volume
       results.DR <- suppressMessages(
         TADA_BigDataHelper(
@@ -812,17 +807,17 @@ TADA_DataRetrieval <- function(startDate = "null",
           maxsites = 300
         )
       )
-
+      
       rm(query_avail)
       gc()
-
+      
       # Get site metadata
       sites.DR <- suppressMessages(
         dataRetrieval::whatWQPsites(
           siteid = unique(results.DR$MonitoringLocationIdentifier)
         )
       )
-
+      
       # Get project metadata
       projects.DR <- suppressMessages(
         dataRetrieval::readWQPdata(
@@ -832,7 +827,7 @@ TADA_DataRetrieval <- function(startDate = "null",
           service = "Project"
         )
       )
-
+      
       # Join results, sites, projects
       TADAprofile <- TADA_JoinWQPProfiles(
         FullPhysChem = results.DR,
@@ -841,18 +836,18 @@ TADA_DataRetrieval <- function(startDate = "null",
       ) %>% dplyr::mutate(
         dplyr::across(tidyselect::everything(), as.character)
       )
-
+      
       # run TADA_AutoClean function
       if (applyautoclean == TRUE) {
         print("Data successfully downloaded. Running TADA_AutoClean function.")
-
+        
         TADAprofile.clean <- TADA_AutoClean(TADAprofile)
       } else {
         TADAprofile.clean <- TADAprofile
       }
-
+      
       return(TADAprofile.clean)
-
+      
       # If not a "big data" pull:
     } else {
       # Retrieve all 3 profiles
@@ -860,25 +855,25 @@ TADA_DataRetrieval <- function(startDate = "null",
       print(WQPquery)
       results.DR <- suppressMessages(
         dataRetrieval::readWQPdata(WQPquery,
-          dataProfile = "resultPhysChem",
-          ignore_attributes = TRUE
+                                   dataProfile = "resultPhysChem",
+                                   ignore_attributes = TRUE
         )
       )
-
+      
       # check if any results are available
       if ((nrow(results.DR) > 0) == FALSE) {
         print("Returning empty results dataframe: Your WQP query returned no results (no data available). Try a different query. Removing some of your query filters OR broadening your search area may help.")
         TADAprofile.clean <- results.DR
       } else {
         sites.DR <- suppressMessages(dataRetrieval::whatWQPsites(WQPquery))
-
+        
         projects.DR <- suppressMessages(
           dataRetrieval::readWQPdata(WQPquery,
-            ignore_attributes = TRUE,
-            service = "Project"
+                                     ignore_attributes = TRUE,
+                                     service = "Project"
           )
         )
-
+        
         TADAprofile <- TADA_JoinWQPProfiles(
           FullPhysChem = results.DR,
           Sites = sites.DR,
@@ -886,17 +881,17 @@ TADA_DataRetrieval <- function(startDate = "null",
         ) %>% dplyr::mutate(
           dplyr::across(tidyselect::everything(), as.character)
         )
-
+        
         # run TADA_AutoClean function
         if (applyautoclean == TRUE) {
           print("Data successfully downloaded. Running TADA_AutoClean function.")
-
+          
           TADAprofile.clean <- TADA_AutoClean(TADAprofile)
         } else {
           TADAprofile.clean <- TADAprofile
         }
       }
-
+      
       return(TADAprofile.clean)
     }
   }
@@ -951,12 +946,12 @@ TADA_TribalOptions <- function(tribal_area_type, return_sf = FALSE) {
     "Oklahoma Tribal Statistical Areas",     "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/4",
     "Virginia Federally Recognized Tribes",  "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/5"
   )
-
+  
   # Confirm usable string provided
   if (!(tribal_area_type %in% map_service_urls$tribal_area)) {
     stop("tribal_area_type must match one of the six tribal spatial layer names.")
   }
-
+  
   # Query Map Service
   tribal_area_sf <- dplyr::filter(
     map_service_urls,
@@ -966,7 +961,7 @@ TADA_TribalOptions <- function(tribal_area_type, return_sf = FALSE) {
     # Return sf
     arcgislayers::arc_select() %>%
     sf::st_make_valid()
-
+  
   # Convert to df if needed, export
   if (return_sf == FALSE) {
     return(
@@ -1032,25 +1027,25 @@ TADA_ReadWQPWebServices <- function(webservice) {
   # read in csv from WQP web service
   if (grepl("zip=yes", webservice)) {
     webservice <- stringr::str_replace(webservice, "zip=yes", "zip=no")
-
+    
     # download data
     webservice <- data.table::fread(toString(webservice))
-
+    
     # if input df was not downloaded using USGS's dataRetrieval, then the
     # column names will include / separators instead of . and TADA uses .
     # (e.g. ResultMeasure/MeasureUnitCode vs. ResultMeasure.MeasureUnitCode)
     colnames(webservice) <- gsub("/", ".", colnames(webservice))
-
+    
     return(webservice)
   } else {
     # download data
     webservice <- data.table::fread(toString(webservice))
-
+    
     # if input df was not downloaded using USGS's dataRetrieval, then the
     # column names will include / separators instead of . and TADA uses .
     # (e.g. ResultMeasure/MeasureUnitCode vs. ResultMeasure.MeasureUnitCode)
     colnames(webservice) <- gsub("/", ".", colnames(webservice))
-
+    
     return(webservice)
   }
 }
@@ -1077,15 +1072,15 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
     dplyr::summarise(tot_n = sum(resultCount)) %>%
     dplyr::filter(tot_n > 0) %>%
     dplyr::arrange(tot_n)
-
+  
   # Sites with less than/equal to maxrecs
   smallsites <- tot_sites %>% dplyr::filter(tot_n <= maxrecs)
   # Sites with more than maxrecs
   bigsites <- tot_sites %>% dplyr::filter(tot_n > maxrecs)
-
+  
   df_small <- data.frame()
   df_big <- data.frame()
-
+  
   # Work with small sites first:
   # Build download groups. Total record count limited to value of maxrecs.
   # Number of sites per download group limited to 300.
@@ -1096,7 +1091,7 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
         threshold = maxrecs,
         maxgroupsize = 300
       ))
-
+    
     # Status update to user
     print(
       paste0(
@@ -1105,9 +1100,9 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
         " results by grouping them together."
       )
     )
-
+    
     small_prog_bar <- txtProgressBar(min = 0, max = sum(smallsites$tot_n), style = 3)
-
+    
     # Download the data for each group
     for (i in 1:max(smallsitesgrp$group)) {
       small_site_chunk <- subset(
@@ -1124,22 +1119,22 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
         )
       ) %>%
         dplyr::mutate(dplyr::across(everything(), as.character))
-
+      
       # If data is returned, stack with what's already been retrieved
       if (dim(results_small)[1] > 0) {
         df_small <- dplyr::bind_rows(df_small, results_small)
       }
-
+      
       # Update progress
       setTxtProgressBar(pb = small_prog_bar, value = nrow(df_small))
     }
     # Close progress bar when complete
     close(small_prog_bar)
-
+    
     rm(smallsites, smallsitesgrp)
     gc()
   }
-
+  
   # Large sites (>= maxrecs) next:
   if (dim(bigsites)[1] > 0) {
     print(
@@ -1149,12 +1144,12 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
         " results, chunking queries by site."
       )
     )
-
+    
     big_prog_bar <- txtProgressBar(min = 0, max = sum(bigsites$tot_n), style = 3)
-
+    
     # Unique site IDs
     bsitesvec <- unique(bigsites$MonitoringLocationIdentifier)
-
+    
     # For each site
     for (i in 1:length(bsitesvec)) {
       # Download each site's data individually
@@ -1167,7 +1162,7 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
         )
       ) %>%
         dplyr::mutate(dplyr::across(everything(), as.character))
-
+      
       if (dim(results_big)[1] > 0) {
         df_big <- dplyr::bind_rows(df_big, results_big)
       }
@@ -1176,14 +1171,14 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
     }
     # Close progress bar when complete
     close(big_prog_bar)
-
+    
     rm(bigsites)
     gc()
   }
-
-
+  
+  
   df_out <- dplyr::bind_rows(df_small, df_big)
-
+  
   return(df_out)
 }
 
@@ -1219,20 +1214,20 @@ TADA_JoinWQPProfiles <- function(FullPhysChem = "null",
                                  Sites = "null",
                                  Projects = "null") {
   FullPhysChem.df <- FullPhysChem
-
+  
   Sites.df <- Sites
-
+  
   Projects.df <- Projects
-
+  
   # Join station data to full phys/chem (FullPhysChem.df)
   if (length(Sites.df > 1)) {
     if (nrow(Sites.df) > 0) {
       join1 <- FullPhysChem.df %>%
         # join stations to results
         dplyr::left_join(Sites.df,
-          by = "MonitoringLocationIdentifier",
-          multiple = "all",
-          relationship = "many-to-many"
+                         by = "MonitoringLocationIdentifier",
+                         multiple = "all",
+                         relationship = "many-to-many"
         ) %>%
         # remove ".x" suffix from column names
         dplyr::rename_at(dplyr::vars(dplyr::ends_with(".x")), ~ stringr::str_replace(., "\\..$", "")) %>%
@@ -1244,8 +1239,8 @@ TADA_JoinWQPProfiles <- function(FullPhysChem = "null",
   } else {
     join1 <- FullPhysChem.df
   }
-
-
+  
+  
   # Add QAPP columns from project
   if (length(Projects.df) > 1) {
     if (nrow(Projects.df) > 0) {
@@ -1271,7 +1266,7 @@ TADA_JoinWQPProfiles <- function(FullPhysChem = "null",
   } else {
     join2 <- join1
   }
-
+  
   return(join2)
 }
 
@@ -1292,7 +1287,7 @@ ask_user <- function(n_records) {
     "Would you like to continue with the download? [yes/no] ",
     sep = ""
   )
-
+  
   # Ask user if they want to continue & check for valid response
   while (TRUE) {
     user_input <- readline(prompt = user_prompt)
@@ -1329,9 +1324,9 @@ make_groups <- function(x, maxrecs) {
       i <- i + 1
       groupings <- plyr::rbind.fill(groupings, group)
     }
-
+    
     x$group <- i
-
+    
     groupings <- plyr::rbind.fill(groupings, x)
   }
   return(groupings)

From f0de6b161e7b651944ec8b492eca54f22ee24fab Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Fri, 24 Jan 2025 15:36:10 -0800
Subject: [PATCH 28/35] Catch clips with no sites

---
 R/DataDiscoveryRetrieval.R | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index c658aa5f..0a98c553 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -261,7 +261,7 @@ TADA_DataRetrieval <- function(startDate = "null",
   }
   
   # Insufficient tribal info provided:
-  # Type but no parcel
+  # Type but no name or parcel
   if ((tribal_area_type != "null") & all(tribe_name_parcel == "null")) {
     stop("A tribe_name_parcel is required if tribal_area_type is provided.")
   }
@@ -412,7 +412,7 @@ TADA_DataRetrieval <- function(startDate = "null",
         stop("Tribal area type or tribal name parcel not recognized. Refer to TADA_TribalOptions() for query options.")
       }
     }
-    
+
     # Check and/or fix geometry
     aoi_sf <- sf::st_make_valid(aoi_sf)
     
@@ -457,7 +457,6 @@ TADA_DataRetrieval <- function(startDate = "null",
       stop("No monitoring sites were returned within your area of interest (no data available).")
     }
     
-    
     quiet_whatWQPsites <- purrr::quietly(dataRetrieval::whatWQPsites)
     
     quiet_bbox_sites <- quiet_whatWQPsites(
@@ -483,6 +482,11 @@ TADA_DataRetrieval <- function(startDate = "null",
     
     clipped_site_ids <- clipped_sites_sf$MonitoringLocationIdentifier
     
+    # Check if any sites are within the clip
+    if ((length(clipped_site_ids) > 0) == FALSE) {
+      stop("No monitoring sites were returned within your area of interest (no data available).")
+    }
+    
     record_count <- bbox_avail %>%
       dplyr::filter(MonitoringLocationIdentifier %in% clipped_site_ids) %>%
       dplyr::pull(resultCount) %>%
@@ -837,7 +841,7 @@ TADA_DataRetrieval <- function(startDate = "null",
         dplyr::across(tidyselect::everything(), as.character)
       )
       
-      # run TADA_AutoClean function
+      # Run TADA_AutoClean function
       if (applyautoclean == TRUE) {
         print("Data successfully downloaded. Running TADA_AutoClean function.")
         
@@ -860,7 +864,7 @@ TADA_DataRetrieval <- function(startDate = "null",
         )
       )
       
-      # check if any results are available
+      # Check if any results are available
       if ((nrow(results.DR) > 0) == FALSE) {
         print("Returning empty results dataframe: Your WQP query returned no results (no data available). Try a different query. Removing some of your query filters OR broadening your search area may help.")
         TADAprofile.clean <- results.DR

From cd05dfd4fd07102587ef5ec192148f68839f3d78 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Mon, 27 Jan 2025 13:08:43 -0800
Subject: [PATCH 29/35] More quietly

---
 R/DataDiscoveryRetrieval.R | 113 ++++++++++++++++++++++++++-----------
 vignettes/TADAModule1.Rmd  |  38 +++++++------
 2 files changed, 100 insertions(+), 51 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 0a98c553..5e0490de 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -270,6 +270,12 @@ TADA_DataRetrieval <- function(startDate = "null",
     stop("A tribal_area_type is required if tribe_name_parcel is provided.")
   }
   
+  # Before proceeding make quiet wrappers for dataRetrieval functions for later
+  # use in if/else processes
+  quiet_whatWQPsites <- purrr::quietly(dataRetrieval::whatWQPsites)
+  quiet_whatWQPdata <- purrr::quietly(dataRetrieval::whatWQPdata)
+  quiet_readWQPdata <- purrr::quietly(dataRetrieval::readWQPdata)
+  
   # If an sf object OR tribal info are provided they will be the basis of the query
   # (The tribal data handling uses sf objects as well)
   if ((!is.null(aoi_sf) & inherits(aoi_sf, "sf")) | (tribal_area_type != "null")) {
@@ -412,7 +418,7 @@ TADA_DataRetrieval <- function(startDate = "null",
         stop("Tribal area type or tribal name parcel not recognized. Refer to TADA_TribalOptions() for query options.")
       }
     }
-
+    
     # Check and/or fix geometry
     aoi_sf <- sf::st_make_valid(aoi_sf)
     
@@ -425,12 +431,11 @@ TADA_DataRetrieval <- function(startDate = "null",
     input_bbox <- sf::st_bbox(aoi_sf)
     
     # Query info on available data within the bbox
-    # Don't want to print every message that's returned by WQP
-    quiet_whatWQPdata <- purrr::quietly(dataRetrieval::whatWQPdata)
     
     # Try getting WQP info
     message("Checking what data is available. This may take a moment.")
     
+    # Don't want to print every message that's returned by WQP
     quiet_bbox_avail <- quiet_whatWQPdata(
       WQPquery,
       bBox = c(input_bbox$xmin, input_bbox$ymin, input_bbox$xmax, input_bbox$ymax)
@@ -457,8 +462,6 @@ TADA_DataRetrieval <- function(startDate = "null",
       stop("No monitoring sites were returned within your area of interest (no data available).")
     }
     
-    quiet_whatWQPsites <- purrr::quietly(dataRetrieval::whatWQPsites)
-    
     quiet_bbox_sites <- quiet_whatWQPsites(
       siteid = bbox_avail$MonitoringLocationIdentifier
     )
@@ -553,15 +556,27 @@ TADA_DataRetrieval <- function(startDate = "null",
           dplyr::select(-geometry)
         
         # Get project metadata
-        projects.DR <- suppressMessages(
-          dataRetrieval::readWQPdata(
-            siteid = clipped_site_ids,
-            WQPquery,
-            ignore_attributes = TRUE,
-            service = "Project"
-          )
+        quiet_projects.DR <- quiet_readWQPdata(
+          siteid = clipped_site_ids,
+          WQPquery,
+          ignore_attributes = TRUE,
+          service = "Project"
         )
         
+        if (is.null(quiet_projects.DR$result)) {
+          stop_message <- quiet_projects.DR$messages %>%
+            grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
+            paste("\n", ., collapse = "") %>%
+            paste("The WQP request returned a NULL with the following message(s): \n",
+                  .,
+                  collapse = "\n"
+            )
+          
+          stop(stop_message)
+        }
+        
+        projects.DR <- quiet_projects.DR$result
+        
         # Join results, sites, projects
         TADAprofile <- TADA_JoinWQPProfiles(
           FullPhysChem = results.DR,
@@ -615,15 +630,27 @@ TADA_DataRetrieval <- function(startDate = "null",
           dplyr::select(-geometry)
         
         # Get project metadata
-        projects.DR <- suppressMessages(
-          dataRetrieval::readWQPdata(
-            siteid = clipped_site_ids,
-            WQPquery,
-            ignore_attributes = TRUE,
-            service = "Project"
-          )
+        quiet_projects.DR <- quiet_readWQPdata(
+          siteid = clipped_site_ids,
+          WQPquery,
+          ignore_attributes = TRUE,
+          service = "Project"
         )
         
+        if (is.null(quiet_projects.DR$result)) {
+          stop_message <- quiet_projects.DR$messages %>%
+            grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
+            paste("\n", ., collapse = "") %>%
+            paste("The WQP request returned a NULL with the following message(s): \n",
+                  .,
+                  collapse = "\n"
+            )
+          
+          stop(stop_message)
+        }
+        
+        projects.DR <- quiet_projects.DR$result
+        
         # Join results, sites, projects
         TADAprofile <- TADA_JoinWQPProfiles(
           FullPhysChem = results.DR,
@@ -758,8 +785,6 @@ TADA_DataRetrieval <- function(startDate = "null",
     message("Checking what data is available. This may take a moment.")
     
     # Don't want to print every message that's returned by WQP
-    quiet_whatWQPdata <- purrr::quietly(dataRetrieval::whatWQPdata)
-    
     quiet_query_avail <- quiet_whatWQPdata(WQPquery)
     
     if (is.null(quiet_query_avail$result)) {
@@ -816,22 +841,44 @@ TADA_DataRetrieval <- function(startDate = "null",
       gc()
       
       # Get site metadata
-      sites.DR <- suppressMessages(
-        dataRetrieval::whatWQPsites(
-          siteid = unique(results.DR$MonitoringLocationIdentifier)
-        )
-      )
+      quiet_sites.DR <- quiet_whatWQPsites(siteid = unique(results.DR$MonitoringLocationIdentifier))
+      
+      if (is.null(quiet_sites.DR$result)) {
+        stop_message <- quiet_sites.DR$messages %>%
+          grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
+          paste("\n", ., collapse = "") %>%
+          paste("The WQP request returned a NULL with the following message(s): \n",
+                .,
+                collapse = "\n"
+          )
+        
+        stop(stop_message)
+      }
+      
+      sites.DR <- quiet_sites.DR$result
       
       # Get project metadata
-      projects.DR <- suppressMessages(
-        dataRetrieval::readWQPdata(
-          siteid = unique(results.DR$MonitoringLocationIdentifier),
-          WQPquery,
-          ignore_attributes = TRUE,
-          service = "Project"
-        )
+      quiet_projects.DR <- quiet_readWQPdata(
+        siteid = unique(results.DR$MonitoringLocationIdentifier),
+        WQPquery,
+        ignore_attributes = TRUE,
+        service = "Project"
       )
       
+      if (is.null(quiet_projects.DR$result)) {
+        stop_message <- quiet_projects.DR$messages %>%
+          grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
+          paste("\n", ., collapse = "") %>%
+          paste("The WQP request returned a NULL with the following message(s): \n",
+                .,
+                collapse = "\n"
+          )
+        
+        stop(stop_message)
+      }
+      
+      projects.DR <- quiet_projects.DR$result
+      
       # Join results, sites, projects
       TADAprofile <- TADA_JoinWQPProfiles(
         FullPhysChem = results.DR,
diff --git a/vignettes/TADAModule1.Rmd b/vignettes/TADAModule1.Rmd
index 8154041b..42a6e93b 100644
--- a/vignettes/TADAModule1.Rmd
+++ b/vignettes/TADAModule1.Rmd
@@ -264,8 +264,9 @@ TADAProfile_tribal <- TADA_DataRetrieval(
 all.equal(data.frame(TADAProfile_single), data.frame(TADAProfile_tribal))
 ```
 
-Additionally, the aoi_sf argument can be used to provide an sf spatial object
-as a query filter. We can match the output of the two short examples above like so:
+Additionally, the aoi_sf argument can be used to provide an sf spatial
+object as a query filter. We can match the output of the two short
+examples above like so:
 
 ```{r TADA_DataRetrieval_spatial}
 TADAProfile_spatial <- TADA_DataRetrieval(
@@ -279,11 +280,11 @@ TADAProfile_spatial <- TADA_DataRetrieval(
 all.equal(data.frame(TADAProfile_single), data.frame(TADAProfile_spatial))
 ```
 
-**Note**: In this example the output data is identical from these three input
-methods. However, in some instances this may not be the case. This is because
-the tribal_area_type method is based on spatial data and so spatial boundaries
-must be taken into account when comparing query results. The same applies when
-using aoi_sf results.
+**Note**: In this example the output data is identical from these three
+input methods. However, in some instances this may not be the case. This
+is because the tribal_area_type method is based on spatial data and so
+spatial boundaries must be taken into account when comparing query
+results. The same applies when using aoi_sf results.
 
 ## USGS dataRetrieval
 
@@ -301,10 +302,11 @@ as part of the data retrieval process.
 ## Big Data Queries
 
 If you need to download a large amount of data from across a large area,
-the TADA_DataRetrieval function now handles this automatically. Whereas in the
-past there was a second function (TADA_BigDataRetrieval) to do this, the
-standard TADA_DataRetrieval function now checks the number of results in each
-query and uses similar methods as TADA_BigDataRetrieval when necessary.
+the TADA_DataRetrieval function now handles this automatically. Whereas
+in the past there was a second function (TADA_BigDataRetrieval) to do
+this, the standard TADA_DataRetrieval function now checks the number of
+results in each query and uses similar methods as TADA_BigDataRetrieval
+when necessary.
 
 The function does multiple synchronous data calls to the WQP
 (waterqualitydata.us). It uses the WQP summary service to limit the
@@ -312,14 +314,14 @@ sites downloaded to only those with relevant data. It pulls back data
 from set number of stations at a time and then joins the data back
 together to produce a single TADA compatible dataframe as the output.
 
-TADA_DataRetrieval now also prompts the user (when ask = TRUE) to confirm that
-they want to download the dataset. As part of this prompt the expected number
-of rows of data are provided to help in making the decision. As the downloads
-occur, a progress bar is shown as well.
+TADA_DataRetrieval now also prompts the user (when ask = TRUE) to
+confirm that they want to download the dataset. As part of this prompt
+the expected number of rows of data are provided to help in making the
+decision. As the downloads occur, a progress bar is shown as well.
 
-See ?TADA_DataRetrieval for more details. WARNING, some of the
-examples below can take multiple HOURS to run. The total run time
-depends on your query inputs.
+See ?TADA_DataRetrieval for more details. WARNING, some of the examples
+below can take multiple HOURS to run. The total run time depends on your
+query inputs.
 
 ```{r BigdataRetrieval, eval = FALSE, results = 'hide'}
 # AK_AL_WaterTemp <- TADA_DataRetrieval(startDate = "2000-01-01", endDate = "2022-12-31", characteristicName = "Temperature, water", statecode = c("AK","AL"))

From 820f762b5f37d69f5dff2d4126bf585d5962e3e6 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Mon, 27 Jan 2025 13:21:51 -0800
Subject: [PATCH 30/35] tigris

---
 DESCRIPTION               | 5 +++--
 vignettes/TADAModule1.Rmd | 6 +++++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index ce8f7442..5e9e3645 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -84,13 +84,14 @@ Suggests:
     tidyverse,
     knitr,
     testthat,
-    testthis, 
+    testthis,
     usethis,
     devtools,
     pkgdown,
     spelling,
     htmltools,
-    yaml
+    yaml,
+    tigris
 VignetteBuilder: knitr, rmarkdown
 Language: en-US
 Config/testthat/edition: 3
diff --git a/vignettes/TADAModule1.Rmd b/vignettes/TADAModule1.Rmd
index 42a6e93b..2aa6c07b 100644
--- a/vignettes/TADAModule1.Rmd
+++ b/vignettes/TADAModule1.Rmd
@@ -134,6 +134,9 @@ geospatial-related filters that are not included in the dataRetrieval
 
 -   tribe_name_parcel
 
+The TADA_TribalOptions function can be used to narrow down options for
+use with tribe_name_parcel. See ?TADA_TribalOptions for more info.
+
 The default TADA_DataRetrieval function automatically runs the
 **TADA_AutoClean** function. In this example, we will set
 **TADA_AutoClean = FALSE** and run it as a separate step in the work
@@ -266,7 +269,8 @@ all.equal(data.frame(TADAProfile_single), data.frame(TADAProfile_tribal))
 
 Additionally, the aoi_sf argument can be used to provide an sf spatial
 object as a query filter. We can match the output of the two short
-examples above like so:
+examples above, using tigris::native_areas to acquire Census Bureau
+spatial data:
 
 ```{r TADA_DataRetrieval_spatial}
 TADAProfile_spatial <- TADA_DataRetrieval(

From 77357beb2f318ddb8976b8c1f7ddcf28995734ee Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Mon, 27 Jan 2025 14:52:27 -0800
Subject: [PATCH 31/35] Recheck/rebuild

---
 NAMESPACE                    |   1 -
 man/TADA_BigDataRetrieval.Rd | 105 -----------------------------------
 man/TADA_DataRetrieval.Rd    |   4 +-
 3 files changed, 2 insertions(+), 108 deletions(-)
 delete mode 100644 man/TADA_BigDataRetrieval.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 0b286c45..5e76429c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -5,7 +5,6 @@ export(TADA_AggregateMeasurements)
 export(TADA_AnalysisDataFilter)
 export(TADA_AutoClean)
 export(TADA_AutoFilter)
-export(TADA_BigDataRetrieval)
 export(TADA_Boxplot)
 export(TADA_CalculateTotalNP)
 export(TADA_CharStringRemoveNA)
diff --git a/man/TADA_BigDataRetrieval.Rd b/man/TADA_BigDataRetrieval.Rd
deleted file mode 100644
index 67de5705..00000000
--- a/man/TADA_BigDataRetrieval.Rd
+++ /dev/null
@@ -1,105 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/DataDiscoveryRetrieval.R
-\name{TADA_BigDataRetrieval}
-\alias{TADA_BigDataRetrieval}
-\title{Large WQP data pulls using dataRetrieval}
-\usage{
-TADA_BigDataRetrieval(
-  startDate = "null",
-  endDate = "null",
-  countrycode = "null",
-  statecode = "null",
-  countycode = "null",
-  huc = "null",
-  siteid = "null",
-  siteType = "null",
-  characteristicName = "null",
-  characteristicType = "null",
-  sampleMedia = "null",
-  organization = "null",
-  maxrecs = 250000,
-  applyautoclean = FALSE
-)
-}
-\arguments{
-\item{startDate}{Start Date string in the format YYYY-MM-DD, for example, "2020-01-01"}
-
-\item{endDate}{End Date string in the format YYYY-MM-DD, for example, "2020-01-01"}
-
-\item{countrycode}{Code that identifies a country or ocean (e.g. countrycode = "CA" for Canada, countrycode = "OA" for Atlantic Ocean). See https://www.waterqualitydata.us/Codes/countrycode for options.}
-
-\item{statecode}{FIPS state alpha code that identifies a state (e.g. statecode = "DE" for Delaware). See https://www.waterqualitydata.us/Codes/statecode for options.}
-
-\item{countycode}{FIPS county name. Note that a state code must also be supplied (e.g. statecode = "AL", countycode = "Chilton"). See https://www.waterqualitydata.us/Codes/countycode for options.}
-
-\item{huc}{A numeric code denoting a hydrologic unit. Example: "04030202". Different size hucs can be entered. See https://epa.maps.arcgis.com/home/item.html?id=796992f4588c401fabec7446ecc7a5a3 for a map with HUCS. Click on a HUC to find the associated code.}
-
-\item{siteid}{Unique monitoring location identifier.}
-
-\item{siteType}{Type of waterbody. See https://www.waterqualitydata.us/Codes/sitetype for options.}
-
-\item{characteristicName}{Name of parameter. See https://www.waterqualitydata.us/Codes/characteristicName for options.}
-
-\item{characteristicType}{Groups of environmental measurements/parameters. See https://www.waterqualitydata.us/Codes/characteristicType for options.}
-
-\item{sampleMedia}{Sampling substrate such as water, air, or sediment. See https://www.waterqualitydata.us/Codes/sampleMedia for options.}
-
-\item{organization}{A string of letters and/or numbers (some additional characters also possible) used to signify an organization with data in the Water Quality Portal. See https://www.waterqualitydata.us/Codes/organization for options.}
-
-\item{maxrecs}{The maximum number of results queried within one call to dataRetrieval.}
-
-\item{applyautoclean}{Logical, defaults to FALSE. If TRUE, runs TADA_AutoClean function on the returned data profile.}
-}
-\value{
-TADA-compatible dataframe
-}
-\description{
-This function does multiple synchronous data calls to the WQP
-(waterqualitydata.us). It uses the WQP summary service to limit the amount
-downloaded to only relevant data (based on user query), pulls back data for
-250000 records at a time, and then joins the data back together to produce a
-single TADA compatible dataframe as the output. For large data sets, that can save a lot
-of time and ultimately reduce the complexity of subsequent data processing.
-Using this function, you will be able to download all data available from all
-sites in the contiguous United States available for the time period,
-characteristicName, and siteType requested. Computer memory may limit the
-size of data frames that your R console will be able to hold in one session.
-Function requires a characteristicName, siteType, statecode, huc, or start/
-end date input. The recommendation is to be as specific as you can with your
-large data call. The function allows the user to run TADA_AutoClean on the data frame,
-but this is not the default as checking large dataframes for exact duplicate
-rows can be time consuming and is better performed on its own once the query is
-completed.
-}
-\details{
-Some code for this function was adapted from this USGS Blog (Author: Aliesha Krall)
-\href{https://waterdata.usgs.gov/blog/large_sample_pull/}{Large Sample Pull}
-
-See ?TADA_AutoClean documentation for more information on this optional input.
-
-Note: TADA_BigDataRetrieval (by leveraging USGS's dataRetrieval),  automatically converts
-the date times to UTC. It also automatically converts the data to dates,
-datetimes, numerics based on a standard algorithm. See: ?dataRetrieval::readWQPdata
-}
-\examples{
-\dontrun{
-# takes approx 3 mins to run
-tada1 <- TADA_BigDataRetrieval(startDate = "2019-01-01", endDate = "2021-12-31", characteristicName = "Temperature, water", statecode = c("AK", "AL"))
-
-# takes approx 21 mins
-tada2 <- TADA_BigDataRetrieval(startDate = "2016-10-01", endDate = "2022-09-30", statecode = "UT")
-
-# takes seconds to run
-tada3 <- TADA_BigDataRetrieval(huc = "04030202", characteristicName = "Escherichia coli")
-
-# takes approx 3 mins to run
-tada4 <- TADA_BigDataRetrieval(startDate = "2004-01-01", countrycode = "CA")
-
-# takes seconds to run
-tada5 <- TADA_BigDataRetrieval(startDate = "2018-01-01", statecode = "AL", countycode = "Chilton")
-
-# takes seconds to run
-tada6 <- TADA_BigDataRetrieval(organization = "PUEBLOOFTESUQUE")
-}
-
-}
diff --git a/man/TADA_DataRetrieval.Rd b/man/TADA_DataRetrieval.Rd
index 9701cf0c..377ca521 100644
--- a/man/TADA_DataRetrieval.Rd
+++ b/man/TADA_DataRetrieval.Rd
@@ -44,9 +44,9 @@ TADA_DataRetrieval(
 
 \item{siteType}{Type of waterbody. See https://www.waterqualitydata.us/Codes/sitetype for options.}
 
-\item{tribal_area_type}{One of the six tribal spatial layers: "Alaska Native Allotments", "Alaska Native Villages",  "American Indian Reservations", "Off-reservation Trust Lands", "Oklahoma Tribal Statistical Areas", or "Virginia Federally Recognized Tribes".}
+\item{tribal_area_type}{One of four tribal spatial layers: "Alaska Native Allotments", "American Indian Reservations", "Off-reservation Trust Lands", or "Oklahoma Tribal Statistical Areas". More info in TADA_TribalOptions(). Note that "Alaska Native Villages" and "Virginia Federally Recognized Tribes" layers will not return a successful query.}
 
-\item{tribe_name_parcel}{The name of a tribe corresponding to an entry in the TRIBE_NAME field of the specified tribal_area_type. OR if the type is Alaska Native Allotments" then the corresponding PARCEL_NO.}
+\item{tribe_name_parcel}{The name of one or more tribes corresponding to entries in the TRIBE_NAME field of the specified tribal_area_type. OR if the type is "Alaska Native Allotments" then the corresponding PARCEL_NO. More info in TADA_TribalOptions().}
 
 \item{characteristicName}{Name of parameter. See https://www.waterqualitydata.us/Codes/characteristicName for options.}
 

From 8e4b98ad94d8444ae2de577547067b6b574fd36f Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Mon, 27 Jan 2025 14:54:15 -0800
Subject: [PATCH 32/35] Style

---
 R/DataDiscoveryRetrieval.R | 381 +++++++++++++++++++------------------
 1 file changed, 191 insertions(+), 190 deletions(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 5e0490de..aa93f03a 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -148,24 +148,26 @@
 #' # Wyandotte Nation "WNENVDPT_WQX"
 #' # Pueblo of Pojoaque "PUEBLO_POJOAQUE"
 #'
-#' tada9 <- TADA_DataRetrieval(organization = c(
-#'   "SFNOES_WQX",
-#'   "CPNWATER",
-#'   "DELAWARENATION",
-#'   "HVTEPA_WQX",
-#'   "O_MTRIBE_WQX",
-#'   "FONDULAC_WQX",
-#'   "SANILDEFONSODECP",
-#'   "PUEBLO_SANTAANA",
-#'   "PUEBLOOFTESUQUE",
-#'   "REDLAKE_WQX",
-#'   "SCEQ",
-#'   "CNENVSER",
-#'   "CHOCNATWQX",
-#'   "WNENVDPT_WQX",
-#'   "PUEBLO_POJOAQUE"
-#' ),
-#' ask = FALSE)
+#' tada9 <- TADA_DataRetrieval(
+#'   organization = c(
+#'     "SFNOES_WQX",
+#'     "CPNWATER",
+#'     "DELAWARENATION",
+#'     "HVTEPA_WQX",
+#'     "O_MTRIBE_WQX",
+#'     "FONDULAC_WQX",
+#'     "SANILDEFONSODECP",
+#'     "PUEBLO_SANTAANA",
+#'     "PUEBLOOFTESUQUE",
+#'     "REDLAKE_WQX",
+#'     "SCEQ",
+#'     "CNENVSER",
+#'     "CHOCNATWQX",
+#'     "WNENVDPT_WQX",
+#'     "PUEBLO_POJOAQUE"
+#'   ),
+#'   ask = FALSE
+#' )
 #'
 #' # query only NWIS data for a 10 year period in CT
 #' tada10 <- TADA_DataRetrieval(
@@ -206,18 +208,17 @@ TADA_DataRetrieval <- function(startDate = "null",
                                maxrecs = 250000,
                                ask = TRUE,
                                applyautoclean = TRUE) {
-  
   # Require one tribal area type:
   if (length(tribal_area_type) > 1) {
     stop("tribal_area_type must be of length 1.")
   }
-  
+
   # Check for incomplete or inconsistent inputs:
-  
+
   # If both an sf object and tribe information are provided it's unclear what
   # the priority should be for the query
   if (!is.null(aoi_sf) &
-      any((tribal_area_type != "null") | (tribe_name_parcel != "null"))) {
+    any((tribal_area_type != "null") | (tribe_name_parcel != "null"))) {
     stop(
       paste0(
         "Both sf data and tribal information have been provided. ",
@@ -225,17 +226,17 @@ TADA_DataRetrieval <- function(startDate = "null",
       )
     )
   }
-  
+
   # Check for other arguments that indicate location. Function will ignore
   # these inputs but warn the user
   if (
     # sf object provided
     (!is.null(aoi_sf) & inherits(aoi_sf, "sf")) &
-    # with additional location info
-    any(
-      (countrycode != "null"), (countycode != "null"), (huc != "null"),
-      (siteid != "null"), (statecode != "null")
-    )
+      # with additional location info
+      any(
+        (countrycode != "null"), (countycode != "null"), (huc != "null"),
+        (siteid != "null"), (statecode != "null")
+      )
   ) {
     warning(
       paste0(
@@ -246,11 +247,11 @@ TADA_DataRetrieval <- function(startDate = "null",
   } else if (
     # Tribe info provided
     (tribal_area_type != "null") &
-    # with additional location info
-    any(
-      (countrycode != "null"), (countycode != "null"), (huc != "null"),
-      (siteid != "null"), (statecode != "null")
-    )
+      # with additional location info
+      any(
+        (countrycode != "null"), (countycode != "null"), (huc != "null"),
+        (siteid != "null"), (statecode != "null")
+      )
   ) {
     warning(
       paste0(
@@ -259,7 +260,7 @@ TADA_DataRetrieval <- function(startDate = "null",
       )
     )
   }
-  
+
   # Insufficient tribal info provided:
   # Type but no name or parcel
   if ((tribal_area_type != "null") & all(tribe_name_parcel == "null")) {
@@ -269,21 +270,21 @@ TADA_DataRetrieval <- function(startDate = "null",
   if ((tribal_area_type == "null") & all(tribe_name_parcel != "null")) {
     stop("A tribal_area_type is required if tribe_name_parcel is provided.")
   }
-  
+
   # Before proceeding make quiet wrappers for dataRetrieval functions for later
   # use in if/else processes
   quiet_whatWQPsites <- purrr::quietly(dataRetrieval::whatWQPsites)
   quiet_whatWQPdata <- purrr::quietly(dataRetrieval::whatWQPdata)
   quiet_readWQPdata <- purrr::quietly(dataRetrieval::readWQPdata)
-  
+
   # If an sf object OR tribal info are provided they will be the basis of the query
   # (The tribal data handling uses sf objects as well)
   if ((!is.null(aoi_sf) & inherits(aoi_sf, "sf")) | (tribal_area_type != "null")) {
     # Build the non-sf part of the query:
-    
+
     # Set query parameters
     WQPquery <- list()
-    
+
     # StartDate
     if (length(startDate) > 1) {
       if (is.na(suppressWarnings(lubridate::parse_date_time(startDate[1], orders = "ymd")))) {
@@ -350,9 +351,9 @@ TADA_DataRetrieval <- function(startDate = "null",
       }
       WQPquery <- c(WQPquery, endDate = endDate)
     }
-    
+
     # sf AOI prep for query
-    
+
     # If tribe info is provided then grab the corresponding sf object:
     if (tribal_area_type != "null") {
       # Make a reference table for tribal area type + url matching
@@ -366,19 +367,19 @@ TADA_DataRetrieval <- function(startDate = "null",
         "Oklahoma Tribal Statistical Areas",     "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/4" # ,
         # "Virginia Federally Recognized Tribes",  "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/5"
       )
-      
+
       # These two layers will not return any data when used for bboxes
       if (tribal_area_type == "Alaska Native Villages") {
         stop("Alaska Native Villages data are centroid points, not spatial boundaries.")
       } else if (tribal_area_type == "Virginia Federally Recognized Tribes") {
         stop("Federally recognized tribal entities in Virginia do not have any available spatial boundaries.")
       }
-      
+
       # These area types allow filtering by TRIBE_NAME (unique within each type)
       if (tribal_area_type %in% c(
         "American Indian Reservations",
         "Off-reservation Trust Lands",
-        "Oklahoma Tribal Statistical Areas" 
+        "Oklahoma Tribal Statistical Areas"
       )
       ) {
         # Get the relevant url
@@ -398,7 +399,7 @@ TADA_DataRetrieval <- function(startDate = "null",
               .
             }
           }
-        
+
         # Otherwise filter by PARCEL_NO (Note that values in this col are not unique)
       } else if (tribal_area_type == "Alaska Native Allotments") {
         aoi_sf <- dplyr::filter(
@@ -418,87 +419,87 @@ TADA_DataRetrieval <- function(startDate = "null",
         stop("Tribal area type or tribal name parcel not recognized. Refer to TADA_TribalOptions() for query options.")
       }
     }
-    
+
     # Check and/or fix geometry
     aoi_sf <- sf::st_make_valid(aoi_sf)
-    
+
     # Match CRS
     if (sf::st_crs(aoi_sf) != 4326) {
       aoi_sf <- sf::st_transform(aoi_sf, crs = 4326)
     }
-    
+
     # Get bbox of the sf object
     input_bbox <- sf::st_bbox(aoi_sf)
-    
+
     # Query info on available data within the bbox
-    
+
     # Try getting WQP info
     message("Checking what data is available. This may take a moment.")
-    
+
     # Don't want to print every message that's returned by WQP
     quiet_bbox_avail <- quiet_whatWQPdata(
       WQPquery,
       bBox = c(input_bbox$xmin, input_bbox$ymin, input_bbox$xmax, input_bbox$ymax)
     )
-    
+
     # Alert & stop if an http error was received
     if (is.null(quiet_bbox_avail$result)) {
       stop_message <- quiet_bbox_avail$messages %>%
         grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
         paste("\n", ., collapse = "") %>%
         paste("The WQP request returned a NULL with the following message(s): \n",
-              .,
-              collapse = "\n"
+          .,
+          collapse = "\n"
         )
-      
+
       stop(stop_message)
     }
-    
+
     # Use result only
     bbox_avail <- quiet_bbox_avail$result
-    
+
     # Check if any sites are within the aoi
     if ((nrow(bbox_avail) > 0) == FALSE) {
       stop("No monitoring sites were returned within your area of interest (no data available).")
     }
-    
+
     quiet_bbox_sites <- quiet_whatWQPsites(
       siteid = bbox_avail$MonitoringLocationIdentifier
     )
-    
+
     if (is.null(quiet_bbox_sites$result)) {
       stop_message <- quiet_bbox_sites$messages %>%
         grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
         paste("\n", ., collapse = "") %>%
         paste("The WQP request returned a NULL with the following message(s): \n",
-              .,
-              collapse = "\n"
+          .,
+          collapse = "\n"
         )
       stop(stop_message)
     }
-    
+
     # Reformat returned info as sf
     bbox_sites_sf <- TADA_MakeSpatial(quiet_bbox_sites$result, crs = 4326)
-    
+
     # Subset sites to only within shapefile and get IDs
     clipped_sites_sf <- bbox_sites_sf[aoi_sf, ]
-    
+
     clipped_site_ids <- clipped_sites_sf$MonitoringLocationIdentifier
-    
+
     # Check if any sites are within the clip
     if ((length(clipped_site_ids) > 0) == FALSE) {
       stop("No monitoring sites were returned within your area of interest (no data available).")
     }
-    
+
     record_count <- bbox_avail %>%
       dplyr::filter(MonitoringLocationIdentifier %in% clipped_site_ids) %>%
       dplyr::pull(resultCount) %>%
       sum()
-    
+
     # Should we proceed with downloads? If ask == TRUE then ask the user.
     if (ask == TRUE) {
       user_decision <- ask_user(n_records = record_count)
-      
+
       # Act on input
       if (user_decision == "yes") {
         print("Proceeding with download.")
@@ -506,10 +507,10 @@ TADA_DataRetrieval <- function(startDate = "null",
         stop("Cancelled by user.", call. = FALSE)
       }
     }
-    
+
     # Continue now with site count
     site_count <- length(clipped_site_ids)
-    
+
     # Check for either more than 300 sites or more records than max_recs.
     # If either is true then we'll approach the pull as a "big data" pull
     if (site_count > 300 | record_count > maxrecs) {
@@ -519,7 +520,7 @@ TADA_DataRetrieval <- function(startDate = "null",
           "If your AOI is a county, state, country, or HUC boundary it would be more efficient to provide a code instead of an sf object."
         )
       )
-      
+
       # Use helper function to download large data volume
       results.DR <- withCallingHandlers(
         TADA_BigDataHelper(
@@ -532,11 +533,11 @@ TADA_DataRetrieval <- function(startDate = "null",
         ),
         message = function(m) message(m$message)
       )
-      
-      
+
+
       rm(bbox_avail, bbox_sites_sf)
       gc()
-      
+
       # Check if any results were returned
       if ((nrow(results.DR) > 0) == FALSE) {
         print(
@@ -554,7 +555,7 @@ TADA_DataRetrieval <- function(startDate = "null",
         sites.DR <- clipped_sites_sf %>%
           dplyr::as_tibble() %>%
           dplyr::select(-geometry)
-        
+
         # Get project metadata
         quiet_projects.DR <- quiet_readWQPdata(
           siteid = clipped_site_ids,
@@ -562,21 +563,21 @@ TADA_DataRetrieval <- function(startDate = "null",
           ignore_attributes = TRUE,
           service = "Project"
         )
-        
+
         if (is.null(quiet_projects.DR$result)) {
           stop_message <- quiet_projects.DR$messages %>%
             grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
             paste("\n", ., collapse = "") %>%
             paste("The WQP request returned a NULL with the following message(s): \n",
-                  .,
-                  collapse = "\n"
+              .,
+              collapse = "\n"
             )
-          
+
           stop(stop_message)
         }
-        
+
         projects.DR <- quiet_projects.DR$result
-        
+
         # Join results, sites, projects
         TADAprofile <- TADA_JoinWQPProfiles(
           FullPhysChem = results.DR,
@@ -585,25 +586,25 @@ TADA_DataRetrieval <- function(startDate = "null",
         ) %>% dplyr::mutate(
           dplyr::across(tidyselect::everything(), as.character)
         )
-        
+
         # run TADA_AutoClean function
         if (applyautoclean == TRUE) {
           print("Data successfully downloaded. Running TADA_AutoClean function.")
-          
+
           TADAprofile.clean <- TADA_AutoClean(TADAprofile)
         } else {
           TADAprofile.clean <- TADAprofile
         }
       }
-      
+
       return(TADAprofile.clean)
-      
+
       # Doesn't meet "big data" threshold:
     } else {
       # Retrieve all 3 profiles
       print("Downloading WQP query results. This may take some time depending upon the query size.")
       print(WQPquery)
-      
+
       # Get results
       results.DR <- suppressMessages(
         dataRetrieval::readWQPdata(
@@ -613,7 +614,7 @@ TADA_DataRetrieval <- function(startDate = "null",
           ignore_attributes = TRUE
         )
       )
-      
+
       # Check if any results were returned
       if ((nrow(results.DR) > 0) == FALSE) {
         paste0(
@@ -628,7 +629,7 @@ TADA_DataRetrieval <- function(startDate = "null",
         sites.DR <- clipped_sites_sf %>%
           dplyr::as_tibble() %>%
           dplyr::select(-geometry)
-        
+
         # Get project metadata
         quiet_projects.DR <- quiet_readWQPdata(
           siteid = clipped_site_ids,
@@ -636,21 +637,21 @@ TADA_DataRetrieval <- function(startDate = "null",
           ignore_attributes = TRUE,
           service = "Project"
         )
-        
+
         if (is.null(quiet_projects.DR$result)) {
           stop_message <- quiet_projects.DR$messages %>%
             grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
             paste("\n", ., collapse = "") %>%
             paste("The WQP request returned a NULL with the following message(s): \n",
-                  .,
-                  collapse = "\n"
+              .,
+              collapse = "\n"
             )
-          
+
           stop(stop_message)
         }
-        
+
         projects.DR <- quiet_projects.DR$result
-        
+
         # Join results, sites, projects
         TADAprofile <- TADA_JoinWQPProfiles(
           FullPhysChem = results.DR,
@@ -659,25 +660,25 @@ TADA_DataRetrieval <- function(startDate = "null",
         ) %>% dplyr::mutate(
           dplyr::across(tidyselect::everything(), as.character)
         )
-        
+
         # Run TADA_AutoClean function
         if (applyautoclean == TRUE) {
           print("Data successfully downloaded. Running TADA_AutoClean function.")
-          
+
           TADAprofile.clean <- TADA_AutoClean(TADAprofile)
         } else {
           TADAprofile.clean <- TADAprofile
         }
       }
-      
+
       return(TADAprofile.clean)
     }
-    
+
     # If no sf object provided:
   } else {
     # Set query parameters
     WQPquery <- list()
-    
+
     if (!"null" %in% statecode) {
       load(system.file("extdata", "statecodes_df.Rdata", package = "EPATADA"))
       statecode <- as.character(statecode)
@@ -690,13 +691,13 @@ TADA_DataRetrieval <- function(startDate = "null",
         WQPquery <- c(WQPquery, statecode = list(statecd))
       }
     }
-    
+
     if (length(huc) > 1) {
       WQPquery <- c(WQPquery, huc = list(huc))
     } else if (huc != "null") {
       WQPquery <- c(WQPquery, huc = huc)
     }
-    
+
     if (length(startDate) > 1) {
       if (is.na(suppressWarnings(lubridate::parse_date_time(startDate[1], orders = "ymd")))) {
         stop("Incorrect date format. Please use the format YYYY-MM-DD.")
@@ -708,67 +709,67 @@ TADA_DataRetrieval <- function(startDate = "null",
       }
       WQPquery <- c(WQPquery, startDate = startDate)
     }
-    
+
     if (length(countrycode) > 1) {
       WQPquery <- c(WQPquery, countrycode = list(countrycode))
     } else if (countrycode != "null") {
       WQPquery <- c(WQPquery, countrycode = countrycode)
     }
-    
+
     if (length(countycode) > 1) {
       WQPquery <- c(WQPquery, countycode = list(countycode))
     } else if (countycode != "null") {
       WQPquery <- c(WQPquery, countycode = countycode)
     }
-    
+
     if (length(siteid) > 1) {
       WQPquery <- c(WQPquery, siteid = list(siteid))
     } else if (siteid != "null") {
       WQPquery <- c(WQPquery, siteid = siteid)
     }
-    
+
     if (length(siteType) > 1) {
       WQPquery <- c(WQPquery, siteType = list(siteType))
     } else if (siteType != "null") {
       WQPquery <- c(WQPquery, siteType = siteType)
     }
-    
+
     if (length(characteristicName) > 1) {
       WQPquery <- c(WQPquery, characteristicName = list(characteristicName))
     } else if (characteristicName != "null") {
       WQPquery <- c(WQPquery, characteristicName = characteristicName)
     }
-    
+
     if (length(characteristicType) > 1) {
       WQPquery <- c(WQPquery, characteristicType = list(characteristicType))
     } else if (characteristicType != "null") {
       WQPquery <- c(WQPquery, characteristicType = characteristicType)
     }
-    
+
     if (length(sampleMedia) > 1) {
       WQPquery <- c(WQPquery, sampleMedia = list(sampleMedia))
     } else if (sampleMedia != "null") {
       WQPquery <- c(WQPquery, sampleMedia = sampleMedia)
     }
-    
+
     if (length(project) > 1) {
       WQPquery <- c(WQPquery, project = list(project))
     } else if (project != "null") {
       WQPquery <- c(WQPquery, project = project)
     }
-    
+
     if (length(providers) > 1) {
       WQPquery <- c(WQPquery, providers = list(providers))
     } else if (providers != "null") {
       WQPquery <- c(WQPquery, providers = providers)
     }
-    
+
     if (length(organization) > 1) {
       WQPquery <- c(WQPquery, organization = list(organization))
     } else if (organization != "null") {
       WQPquery <- c(WQPquery, organization = organization)
     }
-    
+
     if (length(endDate) > 1) {
       if (is.na(suppressWarnings(lubridate::parse_date_time(endDate[1], orders = "ymd")))) {
         stop("Incorrect date format. Please use the format YYYY-MM-DD.")
@@ -780,37 +781,37 @@ TADA_DataRetrieval <- function(startDate = "null",
       }
       WQPquery <- c(WQPquery, endDate = endDate)
     }
-    
+
     # Query info on available data
     message("Checking what data is available. This may take a moment.")
-    
+
     # Don't want to print every message that's returned by WQP
     quiet_query_avail <- quiet_whatWQPdata(WQPquery)
-    
+
     if (is.null(quiet_query_avail$result)) {
       stop_message <- quiet_query_avail$messages %>%
         grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
         paste("\n", ., collapse = "") %>%
         paste("The WQP request returned a NULL with the following message(s): \n",
-              .,
-              collapse = "\n"
+          .,
+          collapse = "\n"
         )
-      
+
       stop(stop_message)
     }
-    
+
     query_avail <- quiet_query_avail$result
-    
+
     site_count <- length(query_avail$MonitoringLocationIdentifier)
-    
+
     record_count <- query_avail %>%
       dplyr::pull(resultCount) %>%
       sum()
-    
+
     # Should we proceed with downloads? If ask == TRUE then ask the user.
     if (ask == TRUE) {
       user_decision <- ask_user(n_records = record_count)
-      
+
       # Act on input
       if (user_decision == "yes") {
         print("Proceeding with download.")
@@ -818,14 +819,14 @@ TADA_DataRetrieval <- function(startDate = "null",
         stop("Cancelled by user.", call. = FALSE)
       }
     }
-    
+
     # Check for either more than 300 sites or more records than max_recs.
     # If either is true then we'll approach the pull as a "big data" pull
     if (site_count > 300 | record_count > maxrecs) {
       message(
         "The number of sites and/or records matched by the query terms is large, so the download may take some time."
       )
-      
+
       # Use helper function to download large data volume
       results.DR <- suppressMessages(
         TADA_BigDataHelper(
@@ -836,27 +837,27 @@ TADA_DataRetrieval <- function(startDate = "null",
           maxsites = 300
         )
       )
-      
+
       rm(query_avail)
       gc()
-      
+
       # Get site metadata
       quiet_sites.DR <- quiet_whatWQPsites(siteid = unique(results.DR$MonitoringLocationIdentifier))
-      
+
       if (is.null(quiet_sites.DR$result)) {
         stop_message <- quiet_sites.DR$messages %>%
           grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
           paste("\n", ., collapse = "") %>%
           paste("The WQP request returned a NULL with the following message(s): \n",
-                .,
-                collapse = "\n"
+            .,
+            collapse = "\n"
           )
-        
+
         stop(stop_message)
       }
-      
+
       sites.DR <- quiet_sites.DR$result
-      
+
       # Get project metadata
       quiet_projects.DR <- quiet_readWQPdata(
         siteid = unique(results.DR$MonitoringLocationIdentifier),
@@ -864,21 +865,21 @@ TADA_DataRetrieval <- function(startDate = "null",
         ignore_attributes = TRUE,
         service = "Project"
       )
-      
+
       if (is.null(quiet_projects.DR$result)) {
         stop_message <- quiet_projects.DR$messages %>%
           grep(pattern = "failed|HTTP", x = ., ignore.case = FALSE, value = TRUE) %>%
           paste("\n", ., collapse = "") %>%
           paste("The WQP request returned a NULL with the following message(s): \n",
-                .,
-                collapse = "\n"
+            .,
+            collapse = "\n"
           )
-        
+
         stop(stop_message)
       }
-      
+
       projects.DR <- quiet_projects.DR$result
-      
+
       # Join results, sites, projects
       TADAprofile <- TADA_JoinWQPProfiles(
         FullPhysChem = results.DR,
@@ -887,18 +888,18 @@ TADA_DataRetrieval <- function(startDate = "null",
       ) %>% dplyr::mutate(
         dplyr::across(tidyselect::everything(), as.character)
       )
-      
+
       # Run TADA_AutoClean function
       if (applyautoclean == TRUE) {
         print("Data successfully downloaded. Running TADA_AutoClean function.")
-        
+
         TADAprofile.clean <- TADA_AutoClean(TADAprofile)
       } else {
         TADAprofile.clean <- TADAprofile
       }
-      
+
       return(TADAprofile.clean)
-      
+
       # If not a "big data" pull:
     } else {
       # Retrieve all 3 profiles
@@ -906,25 +907,25 @@ TADA_DataRetrieval <- function(startDate = "null",
       print(WQPquery)
       results.DR <- suppressMessages(
         dataRetrieval::readWQPdata(WQPquery,
-                                   dataProfile = "resultPhysChem",
-                                   ignore_attributes = TRUE
+          dataProfile = "resultPhysChem",
+          ignore_attributes = TRUE
         )
       )
-      
+
       # Check if any results are available
       if ((nrow(results.DR) > 0) == FALSE) {
         print("Returning empty results dataframe: Your WQP query returned no results (no data available). Try a different query. Removing some of your query filters OR broadening your search area may help.")
         TADAprofile.clean <- results.DR
       } else {
         sites.DR <- suppressMessages(dataRetrieval::whatWQPsites(WQPquery))
-        
+
         projects.DR <- suppressMessages(
           dataRetrieval::readWQPdata(WQPquery,
-                                     ignore_attributes = TRUE,
-                                     service = "Project"
+            ignore_attributes = TRUE,
+            service = "Project"
           )
         )
-        
+
         TADAprofile <- TADA_JoinWQPProfiles(
           FullPhysChem = results.DR,
           Sites = sites.DR,
@@ -932,17 +933,17 @@ TADA_DataRetrieval <- function(startDate = "null",
         ) %>% dplyr::mutate(
           dplyr::across(tidyselect::everything(), as.character)
         )
-        
+
         # run TADA_AutoClean function
         if (applyautoclean == TRUE) {
           print("Data successfully downloaded. Running TADA_AutoClean function.")
-          
+
           TADAprofile.clean <- TADA_AutoClean(TADAprofile)
         } else {
           TADAprofile.clean <- TADAprofile
         }
       }
-      
+
       return(TADAprofile.clean)
     }
   }
@@ -997,12 +998,12 @@ TADA_TribalOptions <- function(tribal_area_type, return_sf = FALSE) {
     "Oklahoma Tribal Statistical Areas",     "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/4",
     "Virginia Federally Recognized Tribes",  "https://geopub.epa.gov/arcgis/rest/services/EMEF/Tribal/MapServer/5"
   )
-  
+
   # Confirm usable string provided
   if (!(tribal_area_type %in% map_service_urls$tribal_area)) {
     stop("tribal_area_type must match one of the six tribal spatial layer names.")
   }
-  
+
   # Query Map Service
   tribal_area_sf <- dplyr::filter(
     map_service_urls,
@@ -1012,7 +1013,7 @@ TADA_TribalOptions <- function(tribal_area_type, return_sf = FALSE) {
     # Return sf
     arcgislayers::arc_select() %>%
     sf::st_make_valid()
-  
+
   # Convert to df if needed, export
   if (return_sf == FALSE) {
     return(
@@ -1078,25 +1079,25 @@ TADA_ReadWQPWebServices <- function(webservice) {
   # read in csv from WQP web service
   if (grepl("zip=yes", webservice)) {
     webservice <- stringr::str_replace(webservice, "zip=yes", "zip=no")
-    
+
     # download data
     webservice <- data.table::fread(toString(webservice))
-    
+
     # if input df was not downloaded using USGS's dataRetrieval, then the
     # column names will include / separators instead of . and TADA uses .
     # (e.g. ResultMeasure/MeasureUnitCode vs. ResultMeasure.MeasureUnitCode)
     colnames(webservice) <- gsub("/", ".", colnames(webservice))
-    
+
     return(webservice)
   } else {
     # download data
     webservice <- data.table::fread(toString(webservice))
-    
+
     # if input df was not downloaded using USGS's dataRetrieval, then the
     # column names will include / separators instead of . and TADA uses .
     # (e.g. ResultMeasure/MeasureUnitCode vs. ResultMeasure.MeasureUnitCode)
     colnames(webservice) <- gsub("/", ".", colnames(webservice))
-    
+
     return(webservice)
   }
 }
@@ -1123,15 +1124,15 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
     dplyr::summarise(tot_n = sum(resultCount)) %>%
     dplyr::filter(tot_n > 0) %>%
     dplyr::arrange(tot_n)
-  
+
   # Sites with less than/equal to maxrecs
   smallsites <- tot_sites %>% dplyr::filter(tot_n <= maxrecs)
   # Sites with more than maxrecs
   bigsites <- tot_sites %>% dplyr::filter(tot_n > maxrecs)
-  
+
   df_small <- data.frame()
   df_big <- data.frame()
-  
+
   # Work with small sites first:
   # Build download groups. Total record count limited to value of maxrecs.
   # Number of sites per download group limited to 300.
@@ -1142,7 +1143,7 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
         threshold = maxrecs,
         maxgroupsize = 300
       ))
-    
+
     # Status update to user
     print(
       paste0(
@@ -1151,9 +1152,9 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
         " results by grouping them together."
       )
     )
-    
+
     small_prog_bar <- txtProgressBar(min = 0, max = sum(smallsites$tot_n), style = 3)
-    
+
     # Download the data for each group
     for (i in 1:max(smallsitesgrp$group)) {
       small_site_chunk <- subset(
@@ -1170,22 +1171,22 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
         )
       ) %>%
         dplyr::mutate(dplyr::across(everything(), as.character))
-      
+
       # If data is returned, stack with what's already been retrieved
       if (dim(results_small)[1] > 0) {
         df_small <- dplyr::bind_rows(df_small, results_small)
       }
-      
+
       # Update progress
       setTxtProgressBar(pb = small_prog_bar, value = nrow(df_small))
     }
     # Close progress bar when complete
     close(small_prog_bar)
-    
+
     rm(smallsites, smallsitesgrp)
     gc()
   }
-  
+
   # Large sites (>= maxrecs) next:
   if (dim(bigsites)[1] > 0) {
     print(
@@ -1195,12 +1196,12 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
         " results, chunking queries by site."
       )
     )
-    
+
     big_prog_bar <- txtProgressBar(min = 0, max = sum(bigsites$tot_n), style = 3)
-    
+
     # Unique site IDs
     bsitesvec <- unique(bigsites$MonitoringLocationIdentifier)
-    
+
     # For each site
     for (i in 1:length(bsitesvec)) {
       # Download each site's data individually
@@ -1213,7 +1214,7 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
         )
       ) %>%
         dplyr::mutate(dplyr::across(everything(), as.character))
-      
+
       if (dim(results_big)[1] > 0) {
         df_big <- dplyr::bind_rows(df_big, results_big)
       }
@@ -1222,14 +1223,14 @@ TADA_BigDataHelper <- function(record_summary, WQPquery, maxrecs = 250000, maxsi
     }
     # Close progress bar when complete
     close(big_prog_bar)
-    
+
     rm(bigsites)
     gc()
   }
-  
-  
+
+
   df_out <- dplyr::bind_rows(df_small, df_big)
-  
+
   return(df_out)
 }
 
@@ -1265,20 +1266,20 @@ TADA_JoinWQPProfiles <- function(FullPhysChem = "null",
                                  Sites = "null",
                                  Projects = "null") {
   FullPhysChem.df <- FullPhysChem
-  
+
   Sites.df <- Sites
-  
+
   Projects.df <- Projects
-  
+
   # Join station data to full phys/chem (FullPhysChem.df)
   if (length(Sites.df > 1)) {
     if (nrow(Sites.df) > 0) {
       join1 <- FullPhysChem.df %>%
         # join stations to results
         dplyr::left_join(Sites.df,
-                         by = "MonitoringLocationIdentifier",
-                         multiple = "all",
-                         relationship = "many-to-many"
+          by = "MonitoringLocationIdentifier",
+          multiple = "all",
+          relationship = "many-to-many"
         ) %>%
         # remove ".x" suffix from column names
         dplyr::rename_at(dplyr::vars(dplyr::ends_with(".x")), ~ stringr::str_replace(., "\\..$", "")) %>%
@@ -1290,8 +1291,8 @@ TADA_JoinWQPProfiles <- function(FullPhysChem = "null",
   } else {
     join1 <- FullPhysChem.df
   }
-  
-  
+
+
   # Add QAPP columns from project
   if (length(Projects.df) > 1) {
     if (nrow(Projects.df) > 0) {
@@ -1317,7 +1318,7 @@ TADA_JoinWQPProfiles <- function(FullPhysChem = "null",
   } else {
     join2 <- join1
   }
-  
+
   return(join2)
 }
 
@@ -1338,7 +1339,7 @@ ask_user <- function(n_records) {
     "Would you like to continue with the download? [yes/no] ",
     sep = ""
   )
-  
+
   # Ask user if they want to continue & check for valid response
   while (TRUE) {
     user_input <- readline(prompt = user_prompt)
@@ -1375,9 +1376,9 @@ make_groups <- function(x, maxrecs) {
       i <- i + 1
       groupings <- plyr::rbind.fill(groupings, group)
     }
-    
+
     x$group <- i
-    
+
     groupings <- plyr::rbind.fill(groupings, x)
   }
   return(groupings)

From fb1fc4977939a6eecf7b2a4aa3a89b17f8b5998b Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Mon, 27 Jan 2025 17:40:08 -0800
Subject: [PATCH 33/35] Update R/DataDiscoveryRetrieval.R

Co-authored-by: B Steele <32140074+steeleb@users.noreply.github.com>
---
 R/DataDiscoveryRetrieval.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index aa93f03a..b88ccc63 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -434,7 +434,7 @@ TADA_DataRetrieval <- function(startDate = "null",
     # Query info on available data within the bbox
 
     # Try getting WQP info
-    message("Checking what data is available. This may take a moment.")
+    message("Checking for available data. This may take a moment.")
 
     # Don't want to print every message that's returned by WQP
     quiet_bbox_avail <- quiet_whatWQPdata(

From 131073633a1a9272558e8d6742aaf1554060e200 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Mon, 27 Jan 2025 17:41:21 -0800
Subject: [PATCH 34/35] Apply suggestions from code review

Co-authored-by: B Steele <32140074+steeleb@users.noreply.github.com>
---
 R/DataDiscoveryRetrieval.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index b88ccc63..74401fec 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -449,6 +449,7 @@ TADA_DataRetrieval <- function(startDate = "null",
         paste("\n", ., collapse = "") %>%
         paste("The WQP request returned a NULL with the following message(s): \n",
           .,
+          "The bounding box is likely too large for this process. Reduce your area of interest and try again.",
           collapse = "\n"
         )
 

From 7602fac4dcb9439b308ee2cbcdeebc24469bfa20 Mon Sep 17 00:00:00 2001
From: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
Date: Mon, 27 Jan 2025 17:59:52 -0800
Subject: [PATCH 35/35] Update DataDiscoveryRetrieval.R

---
 R/DataDiscoveryRetrieval.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R
index 74401fec..2b6288d9 100644
--- a/R/DataDiscoveryRetrieval.R
+++ b/R/DataDiscoveryRetrieval.R
@@ -449,7 +449,7 @@ TADA_DataRetrieval <- function(startDate = "null",
         paste("\n", ., collapse = "") %>%
         paste("The WQP request returned a NULL with the following message(s): \n",
           .,
-          "The bounding box is likely too large for this process. Reduce your area of interest and try again.",
+          "The bounding box may be too large for this process. Reduce your area of interest and try again.",
           collapse = "\n"
         )