Skip to content

Commit

Permalink
Merge pull request #445 from ncbi/release/16.40.0
Browse files Browse the repository at this point in the history
Release updated docs for 16.40.0
  • Loading branch information
BradHolmes authored Jan 8, 2025
2 parents 57b127d + df6f277 commit 8a6e7b2
Show file tree
Hide file tree
Showing 119 changed files with 23,377 additions and 0 deletions.
17 changes: 17 additions & 0 deletions client/.bazelrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
build --workspace_status_command=workspace_status.sh

###
# Correctness guarantees
###
test --incompatible_exclusive_test_sandboxed
build --incompatible_strict_action_env
build --action_env=$LD_LIBRARY_PATH

###
# Convenience
###
build --sandbox_fake_hostname
build --sandbox_fake_username
build --show_timestamps

build --java_runtime_version=remotejdk_21
2 changes: 2 additions & 0 deletions client/.envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
export USE_BAZEL_VERSION=7.x
export GCC_VERSION=13.2.0
4 changes: 4 additions & 0 deletions client/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
bazel-bin
bazel-client
bazel-out
bazel-testlogs
11 changes: 11 additions & 0 deletions client/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
load("@gazelle//:def.bzl", "gazelle")

# gazelle:resolve go datasets/openapi/v2 //openapi:golib.v2
gazelle(name = "gazelle")

genrule(
name = "version",
srcs = [".git"],
outs = ["version.txt"],
cmd_bash = "git --git-dir=$(location :.git) describe --always --dirty |tee $@",
)
49 changes: 49 additions & 0 deletions client/MODULE.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
module(
name = "datasets_client",
)

bazel_dep(name = "rules_go", version = "0.50.1")
bazel_dep(name = "gazelle", version = "0.40.0")

##########################
## Golang Configuration ##
##########################

# Register the Go SDK installed on the host.
go_sdk = use_extension("@rules_go//go:extensions.bzl", "go_sdk")
go_sdk.download(version = "1.23.4")

go_deps = use_extension("@gazelle//:extensions.bzl", "go_deps")
go_deps.from_file(go_work = "//go:go.work")

# All *direct* Go dependencies of the module have to be listed explicitly.
use_repo(
go_deps,
"com_github_antihax_optional",
"com_github_araddon_dateparse",
"com_github_docker_go_units",
"com_github_gosuri_uiprogress",
"com_github_hashicorp_go_cleanhttp",
"com_github_hashicorp_go_retryablehttp",
"com_github_spf13_afero",
"com_github_spf13_cobra",
"com_github_spf13_pflag",
"com_github_thediveo_enumflag",
"com_gitlab_metakeule_fmtdate",
"org_golang_x_exp",
"org_golang_x_text",
)

###########
## Other ##
###########

bazel_dep(name = "aspect_bazel_lib", version = "1.38.1")

http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")

http_archive(
name = "openapi_tools_generator_bazel",
sha256 = "ada94694b10f2503c52a48427bd8589323bff5910bd1a7e5212ce34702d0af65",
urls = ["https://github.com/OpenAPITools/openapi-generator-bazel/releases/download/v0.1.6/openapi-tools-generator-bazel-0.1.6.tar.gz"],
)
473 changes: 473 additions & 0 deletions client/MODULE.bazel.lock

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions client/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Building Datasets Client
## Setup
The build system depends on having `bazel` available. You should use [bazelisk](https://github.com/bazelbuild/bazelisk).

Second, you must have the file `workspace_status.sh` available in your PATH.

## Building
From this directory, run:

```
bazel build apps/public/Datasets/...
```



The executable will be available at `bazel-bin/apps/public/Datasets/v2/cmd/datasets/datasets_/datasets`.

## Alternative installations
`datasets` is also available through [conda](https://anaconda.org/conda-forge/ncbi-datasets-cli).
7 changes: 7 additions & 0 deletions client/WORKSPACE.bzlmod
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#load("@datasets_client//bazel:repositories.bzl", "repositories")

# repositories()

load("@datasets_client//bazel:deps.bzlmod.bzl", gdh_deps = "deps")

gdh_deps()
Empty file.
16 changes: 16 additions & 0 deletions client/apps/public/Datasets/v2/cmd/datasets/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
load("@rules_go//go:def.bzl", "go_binary", "go_library")

go_library(
name = "datasets_lib",
srcs = ["main.go"],
importpath = "datasets_cli/v2/cmd/datasets",
visibility = ["//visibility:private"],
deps = ["//apps/public/Datasets/v2/datasets"],
)

go_binary(
name = "datasets",
embed = [":datasets_lib"],
visibility = ["//visibility:public"],
x_defs = {"datasets_cli/v2/datasets.AppVersion": "{STABLE_GIT_COMMIT}"},
)
11 changes: 11 additions & 0 deletions client/apps/public/Datasets/v2/cmd/datasets/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
//go:generate ${GO_PATH}/bin/goversioninfo -manifest=../../resource/goversioninfo.exe.manifest -64 -product-version=${VER} -file-version=${VER}

package main

import (
"datasets_cli/v2/datasets"
)

func main() {
datasets.Execute()
}
1 change: 1 addition & 0 deletions client/apps/public/Datasets/v2/datasets/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.go.dev
71 changes: 71 additions & 0 deletions client/apps/public/Datasets/v2/datasets/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
load("@rules_go//go:def.bzl", "go_library")

go_library(
name = "datasets",
srcs = [
"Download.go",
"DownloadGene.go",
"DownloadGeneAccession.go",
"DownloadGeneGeneId.go",
"DownloadGeneSymbol.go",
"DownloadGeneTaxon.go",
"DownloadGenome.go",
"DownloadGenomeAccession.go",
"DownloadGenomeTaxon.go",
"DownloadTaxonomy.go",
"DownloadTaxonomyTaxon.go",
"DownloadVirus.go",
"DownloadVirusGenome.go",
"DownloadVirusGenomeAccession.go",
"DownloadVirusGenomeTaxon.go",
"DownloadVirusProtein.go",
"GeneDownloader.go",
"GeneIdRetriever.go",
"GenomeAccessionRetriever.go",
"GenomeDownloader.go",
"PageProcessor.go",
"Rehydrate.go",
"ResolveTaxons.go",
"ResponseHandling.go",
"Summary.go",
"SummaryGene.go",
"SummaryGeneAccession.go",
"SummaryGeneId.go",
"SummaryGeneSymbol.go",
"SummaryGeneTaxon.go",
"SummaryGenome.go",
"SummaryGenomeAccession.go",
"SummaryGenomeTaxon.go",
"SummaryTaxonomy.go",
"SummaryTaxonomyTaxon.go",
"SummaryVirus.go",
"SummaryVirusGenome.go",
"SummaryVirusGenomeAccession.go",
"SummaryVirusGenomeTaxon.go",
"TaxonomyDownloader.go",
"TaxonomyIdRetriever.go",
"VirusDownloader.go",
"progressbar.go",
"root.go",
],
importpath = "datasets_cli/v2/datasets",
visibility = ["//visibility:public"],
deps = [
"//apps/public/Datasets/v2/datasets/flags",
"//go/util",
"//go/util/command",
"//openapi:golib.v2",
"@com_github_antihax_optional//:optional",
"@com_github_docker_go_units//:go-units",
"@com_github_gosuri_uiprogress//:uiprogress",
"@com_github_hashicorp_go_cleanhttp//:go-cleanhttp",
"@com_github_hashicorp_go_retryablehttp//:go-retryablehttp",
"@com_github_spf13_afero//:afero",
"@com_github_spf13_cobra//:cobra",
"@com_github_thediveo_enumflag//:enumflag",
"@com_gitlab_metakeule_fmtdate//:fmtdate",
"@org_golang_x_exp//slices",
"@org_golang_x_text//language",
"@org_golang_x_text//message",
],
)
162 changes: 162 additions & 0 deletions client/apps/public/Datasets/v2/datasets/Download.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
package datasets

import (
"archive/zip"
"errors"
"fmt"
"io"
_nethttp "net/http"
"os"

"github.com/gosuri/uiprogress"
"github.com/spf13/afero"
"github.com/spf13/cobra"
)

var (
argDownloadFilename string
)

func downloadDataForFile(resp *_nethttp.Response, inError error, filename string, length int64, argSkipZipVal bool) (err error) {
f, e := afs.Create(filename)
if e != nil {
err = fmt.Errorf("'%s' opening output file: %s", e, filename)
return
}
defer f.Close()
return downloadData(&f, resp, err, filename, length, argSkipZipVal)
}

func downloadData(f *afero.File, resp *_nethttp.Response, inError error, filename string, length int64, argSkipZipVal bool) (err error) {
if inError != nil {
err = fmt.Errorf("Error connecting to service: %s", inError)
return
}
if resp.StatusCode == 200 {
defer resp.Body.Close()
progressBar := &copyProgressBar{}
progressBar.filename = filename
if _, e := progressBar.Copy(*f, resp.Body); e != nil {
progressBar.status = "error"
err = fmt.Errorf("Download error: %s", e)
return
}
if !isValidZip(filename, argSkipZipVal) {
afs.Remove(filename) //nolint:errcheck
err = errors.New("Internal error (invalid zip archive). Please try again")
if !argNoProgress {
progressBar.status = "invalid zip archive"
}
return
}
if !argNoProgress {
if argSkipZipVal {
progressBar.status = "valid zip structure -- files not checked"
} else {
progressBar.status = "valid data package"
}
}
} else if resp.StatusCode == 404 {
err = fmt.Errorf("request does not match any items in our database")
} else if resp.StatusCode == 429 {
msg := `
Selected items are too large for direct download. Please add '--dehydrated' to the
command to download a zip archive with links to the required data. The full dataset can then
be retrieved by unzipping that file and then executing the command:
datasets rehydrate ncbi_dataset`
err = errors.New(msg)
} else {
err = fmt.Errorf("Unexpected Error: %s", resp.Status)
}
return
}

// downloadCmd represents the download command
var downloadCmd = &cobra.Command{
Use: "download",
Short: "Download a gene, genome or virus dataset as a zip file",
Long: `
Download genome, gene and virus data packages, including sequence, annotation, and metadata, as a zip file.
Refer to NCBI's [download and install](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/download-and-install/) documentation for information about getting started with the command-line tools.`,
Example: ` datasets download genome accession GCF_000001405.40 --chromosomes X,Y --exclude-gff3 --exclude-rna
datasets download genome taxon "bos taurus"
datasets download gene gene-id 672
datasets download gene symbol brca1 --taxon "mus musculus"
datasets download gene accession NP_000483.3
datasets download taxonomy taxon human,sars-cov-2
datasets download virus genome taxon sars-cov-2 --host dog
datasets download virus protein S --host dog --filename SARS2-spike-dog.zip`,
Args: cobra.NoArgs,
PersistentPreRunE: func(cmd *cobra.Command, args []string) (err error) {
if !argNoProgress {
progress.Start()
}
return nil
},
PersistentPostRun: func(cmd *cobra.Command, args []string) {
if !argNoProgress {
progress.Stop()
}
},
}

func isValidZip(filename string, argSkipZipVal bool) bool {
var progressBar *uiprogress.Bar
progressStatus := "Validating package"
if !argNoProgress {
progressBar = progress.AddBar(1)
progressBar.Width = 2
progressBar.PrependFunc(func(b *uiprogress.Bar) string { return progressStatus })
}
fileInfo, err := afs.Stat(filename)
if err != nil {
return false
}

file, err := afs.OpenFile(filename, os.O_RDONLY, os.FileMode(0644))
if err != nil {
return false
}

zipfile, err := zip.NewReader(file, fileInfo.Size())
if err != nil {
return false
}

if !argNoProgress && progressBar != nil && len(zipfile.File) > 0 {
progressBar.Total = len(zipfile.File)
progressBar.Width = 50
progressBar.AppendCompleted()
progressBar.AppendFunc(func(b *uiprogress.Bar) string { return fmt.Sprintf("%d/%d", b.Current(), b.Total) })
}
for _, zippedfile := range zipfile.File {
// fmt.Printf("Name=%s, size=%d, crc=%d\n", zippedfile.Name, zippedfile.UncompressedSize64, zippedfile.CRC32)
r, err := zippedfile.Open()
if err != nil {
return false
}
if !argSkipZipVal {
progressStatus = "Validating package files"
if _, err := io.Copy(io.Discard, r); err != nil {
return false
}
}
if !argNoProgress && progressBar != nil {
progressBar.Incr()
}
}

return true
}

func init() {
downloadCmd.AddCommand(createGeneCmd())
downloadCmd.AddCommand(createGenomeCmd())
downloadCmd.AddCommand(createTaxonomyCmd())
downloadCmd.AddCommand(createVirusCmd())

pflags := downloadCmd.PersistentFlags()
pflags.StringVar(&argDownloadFilename, "filename", "ncbi_dataset.zip", "Specify a custom file name for the downloaded data package")
pflags.BoolVar(&argNoProgress, "no-progressbar", false, "Hide progress bar")
}
Loading

0 comments on commit 8a6e7b2

Please sign in to comment.