From d023ddf8781ce295f5089c68010c20006d284c03 Mon Sep 17 00:00:00 2001 From: Alex Sickler Date: Mon, 14 Oct 2024 12:26:49 -0400 Subject: [PATCH 1/4] :sparkles: make option to hash or list volume --- d3b_dff_cli/modules/dewrangle/volume.py | 68 ++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 8 deletions(-) diff --git a/d3b_dff_cli/modules/dewrangle/volume.py b/d3b_dff_cli/modules/dewrangle/volume.py index 016e61c..570b123 100644 --- a/d3b_dff_cli/modules/dewrangle/volume.py +++ b/d3b_dff_cli/modules/dewrangle/volume.py @@ -105,7 +105,41 @@ def list_and_hash_volume(client, volume_id, billing_id): return job_id -def load_and_hash_volume( +def list_volume(client, volume_id): + """Run Dewrangle list volume mutation.""" + + # prepare mutation + mutation = gql( + """ + mutation VolumeListMutation($id: ID!) { + volumeList(id: $id) { + errors { + ... on MutationError { + message + field + } + } + job { + id + } + } + } + """ + ) + + params = {"id": volume_id} + + # run mutation + result = client.execute(mutation, variable_values=params) + + hf.check_mutation_result(result) + + job_id = result["volumeListAndHash"]["job"]["id"] + + return job_id + + +def load_and_run_job( bucket_name, study_name, region, @@ -113,9 +147,10 @@ def load_and_hash_volume( billing=None, aws_cred=None, token=None, + job_type=str, ): """ - Wrapper function that checks if a volume is loaded, and hashes it. + Wrapper function that checks if a volume is loaded, and either hashes or lists it. Inputs: AWS bucket name, study name, aws region, and optional volume prefix. Output: job id of parent job created when volume is hashed. """ @@ -129,9 +164,6 @@ def load_and_hash_volume( study_id = hf.get_study_id(client, study_name) org_id = hf.get_org_id_from_study(client, study_id) - # get billing group id - billing_group_id = hf.get_billing_id(client, org_id, billing) - # check if volume loaded to study study_volumes = hf.get_study_volumes(client, study_id) volume_id = hf.process_volumes( @@ -147,8 +179,13 @@ def load_and_hash_volume( client, study_id, prefix, region, bucket_name, aws_cred_id ) - # hash - job_id = list_and_hash_volume(client, volume_id, billing_group_id) + if job_type == "hash": + # get billing group id + billing_group_id = hf.get_billing_id(client, org_id, billing) + job_id = list_and_hash_volume(client, volume_id, billing_group_id) + + elif job_type == "list": + job_id = list_volume(client, volume_id, None) except Exception: print( @@ -161,14 +198,29 @@ def load_and_hash_volume( return job_id +def run_list(args): + """Other main function to load and list a volume.""" + job_id = load_and_run_job( + args.bucket, + args.study, + args.region, + args.prefix, + args.billing, + args.credential, + "list", + ) + print(job_id) + + def main(args): """Main function. Call load_and_hash and output job_id.""" - job_id = load_and_hash_volume( + job_id = load_and_run_hash( args.bucket, args.study, args.region, args.prefix, args.billing, args.credential, + "hash", ) print(job_id) From 2b72465e963ac4196a18fae37c54dc24dcfb0bed Mon Sep 17 00:00:00 2001 From: Alex Sickler Date: Mon, 14 Oct 2024 12:36:26 -0400 Subject: [PATCH 2/4] add list_volume command to cli --- d3b_dff_cli/cli.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/d3b_dff_cli/cli.py b/d3b_dff_cli/cli.py index ecf27f4..5ce5328 100644 --- a/d3b_dff_cli/cli.py +++ b/d3b_dff_cli/cli.py @@ -5,6 +5,7 @@ from .modules.validation.check_readgroup import main as check_readgroup from .modules.validation.check_url import main as check_url from .modules.dewrangle.volume import main as hash_volume +from .modules.dewrangle.volume import run_list as list_volume from .modules.dewrangle.list_jobs import main as list_jobs from .modules.dewrangle.download_job import main as download_dewrangle_job from .modules.jira.create_ticket import main as create_ticket @@ -99,6 +100,7 @@ def create_parser(): # Dewrangle commands # hash: load a bucket to Dewrangle and hash it + # list: load a bucket to Dewrangle and list files in it # list_jobs: list jobs run on a bucket # download: download the results of a job dewrangle_parser = subparsers.add_parser("dewrangle", help="Dewrangle commands") @@ -113,6 +115,13 @@ def create_parser(): hash_parser = add_dewrangle_arguments(hash_parser) hash_parser.set_defaults(func=hash_volume) + # list subcommand + list_vol_parser = dewrangle_subparsers.add_parser( + "list_volume", help="List volume in Dewrangle" + ) + list_vol_parser = add_dewrangle_arguments(list_vol_parser) + list_vol_parser.set_defaults(func=list_volume) + # list_jobs subcommand list_parser = dewrangle_subparsers.add_parser( "list_jobs", help="List volume jobs in Dewrangle" From bdee24dc8c6008551b3a8f55853f1a36c50aefc8 Mon Sep 17 00:00:00 2001 From: Alex Sickler Date: Mon, 14 Oct 2024 14:11:26 -0400 Subject: [PATCH 3/4] minor adjustments to make code work --- d3b_dff_cli/modules/dewrangle/volume.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/d3b_dff_cli/modules/dewrangle/volume.py b/d3b_dff_cli/modules/dewrangle/volume.py index 570b123..270b807 100644 --- a/d3b_dff_cli/modules/dewrangle/volume.py +++ b/d3b_dff_cli/modules/dewrangle/volume.py @@ -134,7 +134,7 @@ def list_volume(client, volume_id): hf.check_mutation_result(result) - job_id = result["volumeListAndHash"]["job"]["id"] + job_id = result["volumeList"]["job"]["id"] return job_id @@ -143,11 +143,11 @@ def load_and_run_job( bucket_name, study_name, region, + job_type, prefix=None, billing=None, aws_cred=None, token=None, - job_type=str, ): """ Wrapper function that checks if a volume is loaded, and either hashes or lists it. @@ -185,7 +185,7 @@ def load_and_run_job( job_id = list_and_hash_volume(client, volume_id, billing_group_id) elif job_type == "list": - job_id = list_volume(client, volume_id, None) + job_id = list_volume(client, volume_id) except Exception: print( @@ -204,23 +204,23 @@ def run_list(args): args.bucket, args.study, args.region, + "list", args.prefix, args.billing, args.credential, - "list", ) print(job_id) def main(args): """Main function. Call load_and_hash and output job_id.""" - job_id = load_and_run_hash( + job_id = load_and_run_job( args.bucket, args.study, args.region, + "hash", args.prefix, args.billing, args.credential, - "hash", ) print(job_id) From c27476d9b230f5aacd2beea7226548b93c78bb62 Mon Sep 17 00:00:00 2001 From: Alex Sickler Date: Mon, 14 Oct 2024 14:14:44 -0400 Subject: [PATCH 4/4] update readme --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 90106a9..b396cff 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ To perform dewrangling tasks, use the dewrangle command with subcommands: ```bash d3b dewrangle Subparser 'dewrangle' -usage: d3b dewrangle [-h] {hash,list_jobs,download} ... +usage: d3b dewrangle [-h] {hash,list_volume,list_jobs,download} ... optional arguments: -h, --help show this help message and exit @@ -72,6 +72,7 @@ optional arguments: Dewrangle Subcommands: {hash,list_jobs,download} hash Hash volume in Dewrangle + list_volume List volume in Dewrangle list_jobs List volume jobs in Dewrangle download Download job results from Dewrangle ```