diff --git a/README.md b/README.md index c25d6de..88f686d 100644 --- a/README.md +++ b/README.md @@ -69,10 +69,11 @@ The minimal command to evaluate the accordance between a truthset (generated dat nextflow run eval.nf -profile local,conda --callsets_dir ``` where `--callsets_dir` is the parameter to specify a folder containing the callset VCF files. -Currently, a callset within this folder has to follow the naming convention `callset_.vcf[.gz]` where _\_ is the integer of the corresponding truthset. -Callsets can optionally be _gzip_ compressed. - -🚧 For convenience, the `eval.nf` will get an option to provide a sample sheet as an alternative input format in the future. +Currently, a callset within this folder has to follow the naming convention `callset_.vcf[.gz]` where _\_ is the integer of the corresponding truthset. Alternatively, one can provide a sample sheet (","-delimited) with the columns "index", "callset" and truthset", where "index" is an iteration from 1 to n (number of samples) and "callset"/"truthset" are paths to the respectively matching callset VCF files. The command is +``` +nextflow run eval.nf -profile local,conda --sample_sheet +``` +Note: Callsets can optionally be _gzip_ compressed.
⚠️ Run commands from the root directory Without further ado, please run the commands from a terminal at the top folder (root directory) of this repository. diff --git a/eval.nf b/eval.nf index 00b4c07..69d5622 100644 --- a/eval.nf +++ b/eval.nf @@ -11,22 +11,37 @@ workflow{ ch_ref = Channel.value("$baseDir/" + params.reference) ch_ref_idx = SAMTOOLS_FAIDX(ch_ref) - ch_callsets = Channel.fromPath(params.callsets_dir + "/" + "*.{vcf,vcf.gz}") - ch_callsets - .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('.vcf', '').replaceFirst('.gz', '').toInteger(), file(it)) } - .set {ch_callsets} - //ch_callsets.view() - - ch_truthsets = Channel.fromPath(params.outdir + "/" + "simulated_hap*.vcf") - ch_truthsets - .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('hap', '').replaceFirst('.vcf', '').toInteger(), file(it)) } - .set {ch_truthsets} - //ch_truthsets.view() - - ch_truthsets.join(ch_callsets, by: 0) - .set {ch_variantsets_map} - //ch_variantsets_map.view() + if (params.callsets_dir != "" && params.sample_sheet == "") { + ch_callsets = Channel.fromPath(params.callsets_dir + "/" + "*.{vcf,vcf.gz}") + ch_callsets + .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('.vcf', '').replaceFirst('.gz', '').toInteger(), file(it)) } + .set {ch_callsets} + // ch_callsets.view() + + ch_truthsets = Channel.fromPath(params.outdir + "/" + "simulated_hap*.vcf") + ch_truthsets + .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('hap', '').replaceFirst('.vcf', '').toInteger(), file(it)) } + .set {ch_truthsets} + // ch_truthsets.view() + + ch_truthsets.join(ch_callsets, by: 0) + .set {ch_variantsets_map} + // ch_variantsets_map.view() + + } else if (params.sample_sheet != "" && params.callsets_dir == "") { + + ch_variantsets_map = Channel + .fromPath(params.sample_sheet, checkIfExists: true) + .splitCsv(header: true, sep: ",") + .map {row -> [row["index"] as Integer, row["callset"], row["truthset"]]} + // .view() + + } else { + + exit 1, "ERROR: Data input incorrect - please supply only one of the following parameters: sample_sheet, callsets_dir\n" + + } // ------------------ // | Main processes | diff --git a/nextflow.config b/nextflow.config index c984d9d..f49f4c6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -35,7 +35,8 @@ params { nb_reads = 180 // Evaluation parameters - callsets_dir = 'data' + callsets_dir = '' + sample_sheet = '' } // Enable execution report @@ -63,4 +64,9 @@ profiles { executor.name = "local" executor.cpus = 4 } + + slurm { + executor.name = "slurm" + executor.cpus = 4 + } }