Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/as/dewrangle list #18

Merged
merged 4 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,15 @@ To perform dewrangling tasks, use the dewrangle command with subcommands:
```bash
d3b dewrangle
Subparser 'dewrangle'
usage: d3b dewrangle [-h] {hash,list_jobs,download} ...
usage: d3b dewrangle [-h] {hash,list_volume,list_jobs,download} ...

optional arguments:
-h, --help show this help message and exit

Dewrangle Subcommands:
{hash,list_jobs,download}
hash Hash volume in Dewrangle
list_volume List volume in Dewrangle
list_jobs List volume jobs in Dewrangle
download Download job results from Dewrangle
```
Expand Down
9 changes: 9 additions & 0 deletions d3b_dff_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .modules.validation.check_readgroup import main as check_readgroup
from .modules.validation.check_url import main as check_url
from .modules.dewrangle.volume import main as hash_volume
from .modules.dewrangle.volume import run_list as list_volume
from .modules.dewrangle.list_jobs import main as list_jobs
from .modules.dewrangle.download_job import main as download_dewrangle_job
from .modules.jira.create_ticket import main as create_ticket
Expand Down Expand Up @@ -99,6 +100,7 @@ def create_parser():

# Dewrangle commands
# hash: load a bucket to Dewrangle and hash it
# list: load a bucket to Dewrangle and list files in it
# list_jobs: list jobs run on a bucket
# download: download the results of a job
dewrangle_parser = subparsers.add_parser("dewrangle", help="Dewrangle commands")
Expand All @@ -113,6 +115,13 @@ def create_parser():
hash_parser = add_dewrangle_arguments(hash_parser)
hash_parser.set_defaults(func=hash_volume)

# list subcommand
list_vol_parser = dewrangle_subparsers.add_parser(
"list_volume", help="List volume in Dewrangle"
)
list_vol_parser = add_dewrangle_arguments(list_vol_parser)
list_vol_parser.set_defaults(func=list_volume)

# list_jobs subcommand
list_parser = dewrangle_subparsers.add_parser(
"list_jobs", help="List volume jobs in Dewrangle"
Expand Down
68 changes: 60 additions & 8 deletions d3b_dff_cli/modules/dewrangle/volume.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,17 +105,52 @@ def list_and_hash_volume(client, volume_id, billing_id):
return job_id


def load_and_hash_volume(
def list_volume(client, volume_id):
"""Run Dewrangle list volume mutation."""

# prepare mutation
mutation = gql(
"""
mutation VolumeListMutation($id: ID!) {
volumeList(id: $id) {
errors {
... on MutationError {
message
field
}
}
job {
id
}
}
}
"""
)

params = {"id": volume_id}

# run mutation
result = client.execute(mutation, variable_values=params)

hf.check_mutation_result(result)

job_id = result["volumeList"]["job"]["id"]

return job_id


def load_and_run_job(
bucket_name,
study_name,
region,
job_type,
prefix=None,
billing=None,
aws_cred=None,
token=None,
):
"""
Wrapper function that checks if a volume is loaded, and hashes it.
Wrapper function that checks if a volume is loaded, and either hashes or lists it.
Inputs: AWS bucket name, study name, aws region, and optional volume prefix.
Output: job id of parent job created when volume is hashed.
"""
Expand All @@ -129,9 +164,6 @@ def load_and_hash_volume(
study_id = hf.get_study_id(client, study_name)
org_id = hf.get_org_id_from_study(client, study_id)

# get billing group id
billing_group_id = hf.get_billing_id(client, org_id, billing)

# check if volume loaded to study
study_volumes = hf.get_study_volumes(client, study_id)
volume_id = hf.process_volumes(
Expand All @@ -147,8 +179,13 @@ def load_and_hash_volume(
client, study_id, prefix, region, bucket_name, aws_cred_id
)

# hash
job_id = list_and_hash_volume(client, volume_id, billing_group_id)
if job_type == "hash":
# get billing group id
billing_group_id = hf.get_billing_id(client, org_id, billing)
job_id = list_and_hash_volume(client, volume_id, billing_group_id)

elif job_type == "list":
job_id = list_volume(client, volume_id)

except Exception:
print(
Expand All @@ -161,12 +198,27 @@ def load_and_hash_volume(
return job_id


def run_list(args):
"""Other main function to load and list a volume."""
job_id = load_and_run_job(
args.bucket,
args.study,
args.region,
"list",
args.prefix,
args.billing,
args.credential,
)
print(job_id)


def main(args):
"""Main function. Call load_and_hash and output job_id."""
job_id = load_and_hash_volume(
job_id = load_and_run_job(
args.bucket,
args.study,
args.region,
"hash",
args.prefix,
args.billing,
args.credential,
Expand Down