Skip to content

Commit

Permalink
Merge branch 'develop' into fds-1852-upload-file-annotations
Browse files Browse the repository at this point in the history
  • Loading branch information
lakikowolfe authored May 14, 2024
2 parents 2185105 + a2ca2d9 commit 4de36bd
Show file tree
Hide file tree
Showing 24 changed files with 797 additions and 388 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
^\.pre-commit-config\.yaml$
^renv$
^renv\.lock$
^.venv
^schematic$
14 changes: 13 additions & 1 deletion .github/workflows/docker_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,16 @@ jobs:
build-args: |
DCA_VERSION=${{ env.DCA_VERSION }}
- name: Lowercase image name for trivy
id: string
uses: ASzc/change-string-case-action@v6
with:
string: ${{ env.IMAGE_PATH }}

- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
with:
image-ref: '${{ steps.string.outputs.lowercase }}:${{ steps.meta.outputs.version }}'
format: 'table'
ignore-unfixed: true
severity: 'CRITICAL,HIGH'
169 changes: 102 additions & 67 deletions R/schematic_rest_api.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,33 +14,29 @@ check_success <- function(x){
#' @param url URI of API endpoint
#' @param access_token Synapse PAT
#' @param asset_view ID of view listing all project data assets
#' @param dataset_id the parent ID of the manifest
#' @param manifest_id the parent ID of the manifest
#' @param as_json if True return the manifest in JSON format
#' @returns a csv of the manifest
#' @export
manifest_download <- function(url = "http://localhost:3001/v1/manifest/download", access_token, asset_view, dataset_id, as_json=TRUE, new_manifest_name=NULL) {
request <- httr::GET(
url = url,
httr::add_headers(Authorization = sprintf("Bearer %s", access_token)),
query = list(
asset_view = asset_view,
dataset_id = dataset_id,
manifest_download <- function(url = "http://localhost:3001/v1/manifest/download", access_token, manifest_id, as_json=TRUE, new_manifest_name=NULL) {

req <- httr2::request(url) |>
httr2::req_retry(
max_tries = 3,
is_transient = \(r) httr2::resp_status(r) %in% c(429, 500, 503, 403)
) |>
httr2::req_error(is_error = \(r) FALSE)
resp <- req |>
httr2::req_headers(Authorization = sprintf("Bearer %s", access_token)) |>
httr2::req_url_query(
manifest_id = manifest_id,
as_json = as_json,
new_manifest_name = new_manifest_name
)
)

check_success(request)
response <- httr::content(request, type = "application/json")

# Output can have many NULL values which get dropped or cause errors. Set them to NA
nullToNA <- function(x) {
x[sapply(x, is.null)] <- NA
return(x)
}
df <- do.call(rbind, lapply(response, rbind))
nullToNA(df)

) |>
httr2::req_perform()
resp |> httr2::resp_body_string() |>
(function(d) gsub('NaN', '"NA"', x = d))() |>
jsonlite::fromJSON()
}

#' schematic rest api to generate manifest
Expand Down Expand Up @@ -132,6 +128,7 @@ manifest_validate <- function(url="http://localhost:3001/v1/model/validate",
project_scope = NULL,
access_token,
asset_view = NULL,
json_str = NULL,
data_model_labels = "class_label") {

flattenbody <- function(x) {
Expand All @@ -153,35 +150,68 @@ manifest_validate <- function(url="http://localhost:3001/v1/model/validate",
}, names(x), x, USE.NAMES = FALSE, SIMPLIFY = FALSE))
}

req <- httr::POST(url,
httr::add_headers(Authorization = sprintf("Bearer %s", access_token)),
query=flattenbody(list(
schema_url=schema_url,
data_type=data_type,
restrict_rules=restrict_rules,
project_scope = project_scope,
asset_view = asset_view,
data_model_labels = data_model_labels)),
body=list(file_name=httr::upload_file(file_name))
)
if (all(is.null(json_str), is.null(file_name))) {
stop("Must provide either a file to upload or a json")
}

# Format server error in a way validationResult can handle
if (httr::http_status(req)$category == "Server error") {
return(
list(
list(
"errors" = list(
Row = NA, Column = NA, Value = NA,
Error = sprintf("Cannot validate manifest: %s",
httr::http_status(req)$message)
)
)
)
)
if (is.null(json_str)) {
reqs <- httr2::request(url) |>
httr2::req_retry(
max_tries = 3,
is_transient = \(r) httr2::resp_status(r) %in% c(429, 500, 503, 504, 403)
) |>
httr2::req_throttle(1/2) |>
httr2::req_error(is_error = \(reqs) FALSE)
resp <- reqs |>
httr2::req_headers(Authorization = sprintf("Bearer %s", access_token)) |>
httr2::req_url_query(
schema_url=schema_url,
data_type=data_type,
restrict_rules=restrict_rules,
project_scope = project_scope,
data_model_labels = data_model_labels,
asset_view = asset_view
) |>
httr2::req_body_multipart(file_name=curl::form_file(file_name)) |>
httr2::req_perform()
} else {
req <- httr2::request(url) |>
httr2::req_throttle(1)
resp <- req |>
httr2::req_headers(Authorization = sprintf("Bearer %s", access_token)) |>
httr2::req_url_query(
schema_url=schema_url,
data_type=data_type,
restrict_rules=restrict_rules,
project_scope = project_scope,
asset_view = asset_view,
data_model_labels = data_model_labels,
json_str = json_str
) |>
#httr2::req_retry(
# max_tries = 3,
# is_transient = \(resp) httr2::resp_status(resp) %in% c(429, 500, 503, 504)
#) |>
#httr2::req_error(is_error = \(resp) FALSE) |>
httr2::req_perform()
}
check_success(req)
annotation_status <- httr::content(req)
annotation_status

# Format server error in a way validationResult can handle
# if (httr2::resp_is_error(resp)) {
# return(
# list(
# list(
# "errors" = list(
# Row = NA, Column = NA, Value = NA,
# Error = sprintf("Cannot validate manifest: %s",
# httr2::resp_status_desc(resp)
# )
# )
# )
# )
# )
# }
httr2::resp_body_json(resp)
}


Expand Down Expand Up @@ -261,23 +291,28 @@ model_component_requirements <- function(url="http://localhost:3001/v1/model/com
as_graph = FALSE,
data_model_labels = "class_label") {

req <- httr::GET(url,
query = list(
schema_url = schema_url,
source_component = source_component,
as_graph = as_graph,
data_model_labels = data_model_labels
))

check_success(req)
cont <- httr::content(req)

if (inherits(cont, "xml_document")){
err_msg <- xml2::xml_text(xml2::xml_child(cont, "head/title"))
stop(sprintf("%s", err_msg))
reqs <- httr2::request(url) |>
httr2::req_retry(
max_tries = 5,
is_transient = \(r) httr2::resp_status(r) %in% c(429, 500, 503)
) |>
httr2::req_error(is_error = \(r) FALSE)
resp <- reqs |>
httr2::req_url_query(
schema_url = schema_url,
source_component = source_component,
data_model_labels = data_model_labels,
as_graph = as_graph
) |>
#httr2::req_retry(max_tries = 3) |>
httr2::req_perform()
if (httr2::resp_is_error(resp)) {
warning(sprintf("model/component-requirement failed for %s. returning empty list. %s",
source_component, httr2::resp_body_json(resp)$title))
return(list())
}

cont
resp |>
httr2::resp_body_json()

}

Expand All @@ -302,7 +337,7 @@ storage_project_datasets <- function(url="http://localhost:3001/v1/storage/proje
asset_view=asset_view,
project_id=project_id)
)

check_success(req)
httr::content(req)
}
Expand Down Expand Up @@ -376,7 +411,7 @@ get_asset_view_table <- function(url="http://localhost:3001/v1/storage/assets/ta
if (return_type=="json") {
return(list2DF(fromJSON(httr::content(req))))
} else {
csv <- readr::read_csv(httr::content(req))
csv <- readr::read_csv(httr::content(req), show_col_types = FALSE)
return(csv)
}

Expand Down
66 changes: 49 additions & 17 deletions R/synapse_rest_api.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,21 +54,19 @@ synapse_is_certified <- function(url="https://repo-prod.prod.sagebase.org/repo/v
#' @param auth Synapse PAT
#'
#' @export
synapse_get <- function(url = "https://repo-prod.prod.sagebase.org/repo/v1/entity/",
synapse_get <- function(url = "https://repo-prod.prod.sagebase.org/repo/v1/entity",
id, auth) {

if (is.null(id)) stop("id cannot be NULL")
req_url <- file.path(url, id)
req <- httr::GET(req_url,
httr::add_headers(Authorization=paste0("Bearer ", auth)))

# Send error if unsuccessful query
status <- httr::http_status(req)
if (status$category != "Success") stop(status$message)

cont <- httr::content(req)
dplyr::bind_rows(cont)

req <- httr2::request(file.path(url, id))
resp <- req |>
httr2::req_retry(
max_tries = 5,
is_transient = \(resp) httr2::resp_status(resp) %in% c(429, 500, 503, 403)
) |>
httr2::req_headers(Authorization = sprintf("Bearer %s", auth)) |>
httr2::req_perform()
resp |> httr2::resp_body_json()
}


Expand Down Expand Up @@ -216,9 +214,16 @@ synapse_table_query <- function(id, auth, query, partMask=0x7F) {
#' @param auth Synapse token
synapse_table_get <- function(id, async_token, auth) {
url <- file.path("https://repo-prod.prod.sagebase.org/repo/v1/entity", id,"table/query/async/get", async_token)
req <- httr::GET(url = url,
httr::add_headers(Authorization=paste0("Bearer ", auth)))
httr::content(req)
request <- httr2::request(url)
response <- request |>
httr2::req_retry(
max_tries = 5,
is_transient = \(r) httr2::resp_status(r) %in% c(429, 500, 503, 202, 403)
) |>
httr2::req_headers(Authorization = sprintf("Bearer %s", auth)) |>
httr2::req_perform()
httr2::resp_body_json(response)

}

#' @title Get column names from a Synapse table
Expand All @@ -245,7 +250,6 @@ synapse_storage_projects <- function(id, auth, select_cols = c("id", "name", "pa
select_cols_format <- paste(select_cols, collapse = ", ")
query <- sprintf("select distinct %s from %s", select_cols_format, id)
request <- synapse_table_query(id, auth, query, partMask = 0x1)
Sys.sleep(1)
response <- synapse_table_get(id, request$token, auth)

setNames(
Expand Down Expand Up @@ -278,6 +282,34 @@ synapse_download_file_handle <- function(dataFileHandleId, id, auth, filepath=NU
download_url <- httr::content(request)
destfile <- ifelse(is.null(filepath), tempfile(), filepath)
download.file(download_url, destfile)
if (is.null(filepath)) readr::read_csv(destfile)
if (is.null(filepath)) readr::read_csv(destfile, show_col_types = FALSE)

}

#' @title Download the storage manifest records from an asset view table
synapse_get_manifests_in_asset_view <- function(id, auth) {
request <- synapse_table_query(
id = id,
auth = auth,
query = paste("select * from",
id,
"where name like 'synapse|_storage|_manifest|_%' escape '|'"),
partMask = 0x11)
response <- synapse_table_get(
id = id,
async_token = request$token,
auth = auth)
# Format the query results by reshaping the results list and getting column
# names. partMask 0x11 gets queryResults and column names
setNames(
tibble::as_tibble(
t(
vapply(
response$queryResult$queryResults$rows, function(x) {
null_ind <- which(sapply(x$values, is.null))
x$values[null_ind] <- NA
unlist(x$values)
},
character(length(response$columnModels))))),
vapply(response$columnModels, function(x) x$name,character(1L)))
}
35 changes: 28 additions & 7 deletions R/template_config.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,22 @@ get_display_names <- function(qlist) {
}

#' @export
create_template_config <- function(data_model, include_schemas = NULL, exclude_schemas = NULL) {
create_template_config <- function(
data_model,
include_schemas = NULL,
exclude_schemas = NULL,
data_model_labels = "class_label") {

if (!is.null(include_schemas) && !is.null(exclude_schemas)) stop("include_schemas and exclude_schemas cannot both have values")
edges <- graph_by_edge_type(schema_url = data_model)
edges <- graph_by_edge_type(schema_url = data_model, data_model_labels = data_model_labels)
schema_names <- format_edge_type(edges)
nl <- setNames(as.list(schema_names$schema_name), rep("node_list", length(schema_names$schema_name)))
dnames <- get_display_names(c(schema_url = data_model, nl)) |> httr::content()
dnames <- get_display_names(
c(schema_url = data_model,
nl,
data_model_labels=data_model_labels)
) |>
httr::content()
config <- data.frame(display_name = unlist(dnames), schema_name = unlist(nl)) |>
dplyr::left_join(schema_names, by = "schema_name") |>
dplyr::mutate(type = ifelse(file_based, "file", "record")) |>
Expand All @@ -44,8 +54,13 @@ create_template_config <- function(data_model, include_schemas = NULL, exclude_s
}

#' @export
create_dca_template_config <- function(data_model, include_schemas = NULL, exclude_schemas = NULL) {
df <- create_template_config(data_model, include_schemas, exclude_schemas)
create_dca_template_config <- function(
data_model,
include_schemas = NULL,
exclude_schemas = NULL,
data_model_labels = "class_label") {

df <- create_template_config(data_model, include_schemas, exclude_schemas, data_model_labels)
schematic_version <- httr::GET("https://schematic-dev.api.sagebionetworks.org/v1/version") |>
httr::content()
list(
Expand All @@ -57,7 +72,13 @@ create_dca_template_config <- function(data_model, include_schemas = NULL, exclu

#' @export
#' @description Create a DCA-specific template generation function
write_dca_template_config <- function(data_model, file, include_schemas = NULL, exclude_schemas = NULL) {
df <- create_dca_template_config(data_model, include_schemas, exclude_schemas)
write_dca_template_config <- function(
data_model,
file,
include_schemas = NULL,
exclude_schemas = NULL,
data_model_labels = "class_label") {

df <- create_dca_template_config(data_model, include_schemas, exclude_schemas, data_model_labels)
jsonlite::write_json(df, file, pretty = TRUE, auto_unbox = TRUE)
}
Loading

0 comments on commit 4de36bd

Please sign in to comment.