Skip to content

Commit

Permalink
Update workflow dependencies (#20)
Browse files Browse the repository at this point in the history
* Update workflow dependencies

* Update process_datasets workflow

* update run becnhmark workflow

* Update changelog

* refactor run benchmark workflow
  • Loading branch information
KaiWaldrant authored Sep 20, 2024
1 parent 10119ed commit f5021bb
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 274 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@

* Update test_resources path in components (PR #18).

* Update workflows to use core repository dependency (PR #20).

## BUG FIXES

* Update the nextflow workflow dependencies (PR #17).
Expand Down
5 changes: 3 additions & 2 deletions _viash.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,11 @@ authors:
orcid: "0009-0003-8555-1361"

repositories:
- name: openproblems
- name: core
type: github
repo: openproblems-bio/openproblems
repo: openproblems-bio/core
tag: build/main
path: viash/core

viash_version: 0.9.0

Expand Down
4 changes: 2 additions & 2 deletions src/workflows/process_datasets/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ resources:
entrypoint: run_wf
- path: /common/nextflow_helpers/helper.nf
dependencies:
- name: common/check_dataset_schema
repository: openproblems
- name: schema/verify_data_structure
repository: core
- name: data_processors/process_dataset
runners:
- type: nextflow
121 changes: 2 additions & 119 deletions src/workflows/process_datasets/main.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
include { findArgumentSchema } from "${meta.resources_dir}/helper.nf"

workflow auto {
findStatesTemp(params, meta.config)
findStates(params, meta.config)
| meta.workflow.run(
auto: [publish: "state"]
)
Expand All @@ -14,7 +14,7 @@ workflow run_wf {
main:
output_ch = input_ch

| check_dataset_schema.run(
| verify_data_structure.run(
fromState: { id, state ->
def schema = findArgumentSchema(meta.config, "input")
def schemaYaml = tempFile("schema.yaml")
Expand Down Expand Up @@ -52,120 +52,3 @@ workflow run_wf {
emit:
output_ch
}

// temp fix for rename_keys typo

def findStatesTemp(Map params, Map config) {
def auto_config = deepClone(config)
def auto_params = deepClone(params)

auto_config = auto_config.clone()
// override arguments
auto_config.argument_groups = []
auto_config.arguments = [
[
type: "string",
name: "--id",
description: "A dummy identifier",
required: false
],
[
type: "file",
name: "--input_states",
example: "/path/to/input/directory/**/state.yaml",
description: "Path to input directory containing the datasets to be integrated.",
required: true,
multiple: true,
multiple_sep: ";"
],
[
type: "string",
name: "--filter",
example: "foo/.*/state.yaml",
description: "Regex to filter state files by path.",
required: false
],
// to do: make this a yaml blob?
[
type: "string",
name: "--rename_keys",
example: ["newKey1:oldKey1", "newKey2:oldKey2"],
description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.",
required: false,
multiple: true,
multiple_sep: ";"
],
[
type: "string",
name: "--settings",
example: '{"output_dataset": "dataset.h5ad", "k": 10}',
description: "Global arguments as a JSON glob to be passed to all components.",
required: false
]
]
if (!(auto_params.containsKey("id"))) {
auto_params["id"] = "auto"
}

// run auto config through processConfig once more
auto_config = processConfig(auto_config)

workflow findStatesTempWf {
helpMessage(auto_config)

output_ch =
channelFromParams(auto_params, auto_config)
| flatMap { autoId, args ->

def globalSettings = args.settings ? readYamlBlob(args.settings) : [:]

// look for state files in input dir
def stateFiles = args.input_states

// filter state files by regex
if (args.filter) {
stateFiles = stateFiles.findAll{ stateFile ->
def stateFileStr = stateFile.toString()
def matcher = stateFileStr =~ args.filter
matcher.matches()}
}

// read in states
def states = stateFiles.collect { stateFile ->
def state_ = readTaggedYaml(stateFile)
[state_.id, state_]
}

// construct renameMap
if (args.rename_keys) {
def renameMap = args.rename_keys.collectEntries{renameString ->
def split = renameString.split(":")
assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey,newKey:oldKey'"
split
}

// rename keys in state, only let states through which have all keys
// also add global settings
states = states.collectMany{id, state ->
def newState = [:]

for (key in renameMap.keySet()) {
def origKey = renameMap[key]
if (!(state.containsKey(origKey))) {
return []
}
newState[key] = state[origKey]
}

[[id, globalSettings + newState]]
}
}

states
}
emit:
output_ch
}

return findStatesTempWf
}
6 changes: 2 additions & 4 deletions src/workflows/run_benchmark/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,8 @@ resources:
- type: file
path: /_viash.yaml
dependencies:
- name: common/check_dataset_schema
repository: openproblems
- name: common/extract_metadata
repository: openproblems
- name: h5ad/extract_uns_metadata
repository: core
- name: control_methods/no_denoising
- name: control_methods/perfect_denoising
- name: methods/alra
Expand Down
Loading

0 comments on commit f5021bb

Please sign in to comment.