Skip to content

Commit

Permalink
Merge pull request #18 from d3b-center/feature/as/dewrangle_list
Browse files Browse the repository at this point in the history
Feature/as/dewrangle list
  • Loading branch information
sickler-alex authored Oct 15, 2024
2 parents 270b97b + c27476d commit 11c5fe1
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 9 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,15 @@ To perform dewrangling tasks, use the dewrangle command with subcommands:
```bash
d3b dewrangle
Subparser 'dewrangle'
usage: d3b dewrangle [-h] {hash,list_jobs,download} ...
usage: d3b dewrangle [-h] {hash,list_volume,list_jobs,download} ...

optional arguments:
-h, --help show this help message and exit

Dewrangle Subcommands:
{hash,list_jobs,download}
hash Hash volume in Dewrangle
list_volume List volume in Dewrangle
list_jobs List volume jobs in Dewrangle
download Download job results from Dewrangle
```
Expand Down
9 changes: 9 additions & 0 deletions d3b_dff_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .modules.validation.check_readgroup import main as check_readgroup
from .modules.validation.check_url import main as check_url
from .modules.dewrangle.volume import main as hash_volume
from .modules.dewrangle.volume import run_list as list_volume
from .modules.dewrangle.list_jobs import main as list_jobs
from .modules.dewrangle.download_job import main as download_dewrangle_job
from .modules.jira.create_ticket import main as create_ticket
Expand Down Expand Up @@ -99,6 +100,7 @@ def create_parser():

# Dewrangle commands
# hash: load a bucket to Dewrangle and hash it
# list: load a bucket to Dewrangle and list files in it
# list_jobs: list jobs run on a bucket
# download: download the results of a job
dewrangle_parser = subparsers.add_parser("dewrangle", help="Dewrangle commands")
Expand All @@ -113,6 +115,13 @@ def create_parser():
hash_parser = add_dewrangle_arguments(hash_parser)
hash_parser.set_defaults(func=hash_volume)

# list subcommand
list_vol_parser = dewrangle_subparsers.add_parser(
"list_volume", help="List volume in Dewrangle"
)
list_vol_parser = add_dewrangle_arguments(list_vol_parser)
list_vol_parser.set_defaults(func=list_volume)

# list_jobs subcommand
list_parser = dewrangle_subparsers.add_parser(
"list_jobs", help="List volume jobs in Dewrangle"
Expand Down
68 changes: 60 additions & 8 deletions d3b_dff_cli/modules/dewrangle/volume.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,17 +105,52 @@ def list_and_hash_volume(client, volume_id, billing_id):
return job_id


def load_and_hash_volume(
def list_volume(client, volume_id):
"""Run Dewrangle list volume mutation."""

# prepare mutation
mutation = gql(
"""
mutation VolumeListMutation($id: ID!) {
volumeList(id: $id) {
errors {
... on MutationError {
message
field
}
}
job {
id
}
}
}
"""
)

params = {"id": volume_id}

# run mutation
result = client.execute(mutation, variable_values=params)

hf.check_mutation_result(result)

job_id = result["volumeList"]["job"]["id"]

return job_id


def load_and_run_job(
bucket_name,
study_name,
region,
job_type,
prefix=None,
billing=None,
aws_cred=None,
token=None,
):
"""
Wrapper function that checks if a volume is loaded, and hashes it.
Wrapper function that checks if a volume is loaded, and either hashes or lists it.
Inputs: AWS bucket name, study name, aws region, and optional volume prefix.
Output: job id of parent job created when volume is hashed.
"""
Expand All @@ -129,9 +164,6 @@ def load_and_hash_volume(
study_id = hf.get_study_id(client, study_name)
org_id = hf.get_org_id_from_study(client, study_id)

# get billing group id
billing_group_id = hf.get_billing_id(client, org_id, billing)

# check if volume loaded to study
study_volumes = hf.get_study_volumes(client, study_id)
volume_id = hf.process_volumes(
Expand All @@ -147,8 +179,13 @@ def load_and_hash_volume(
client, study_id, prefix, region, bucket_name, aws_cred_id
)

# hash
job_id = list_and_hash_volume(client, volume_id, billing_group_id)
if job_type == "hash":
# get billing group id
billing_group_id = hf.get_billing_id(client, org_id, billing)
job_id = list_and_hash_volume(client, volume_id, billing_group_id)

elif job_type == "list":
job_id = list_volume(client, volume_id)

except Exception:
print(
Expand All @@ -161,12 +198,27 @@ def load_and_hash_volume(
return job_id


def run_list(args):
"""Other main function to load and list a volume."""
job_id = load_and_run_job(
args.bucket,
args.study,
args.region,
"list",
args.prefix,
args.billing,
args.credential,
)
print(job_id)


def main(args):
"""Main function. Call load_and_hash and output job_id."""
job_id = load_and_hash_volume(
job_id = load_and_run_job(
args.bucket,
args.study,
args.region,
"hash",
args.prefix,
args.billing,
args.credential,
Expand Down

0 comments on commit 11c5fe1

Please sign in to comment.