diff --git a/README.md b/README.md index c25d6de..88f686d 100644 --- a/README.md +++ b/README.md @@ -69,10 +69,11 @@ The minimal command to evaluate the accordance between a truthset (generated dat nextflow run eval.nf -profile local,conda --callsets_dir ``` where `--callsets_dir` is the parameter to specify a folder containing the callset VCF files. -Currently, a callset within this folder has to follow the naming convention `callset_.vcf[.gz]` where _\_ is the integer of the corresponding truthset. -Callsets can optionally be _gzip_ compressed. - -🚧 For convenience, the `eval.nf` will get an option to provide a sample sheet as an alternative input format in the future. +Currently, a callset within this folder has to follow the naming convention `callset_.vcf[.gz]` where _\_ is the integer of the corresponding truthset. Alternatively, one can provide a sample sheet (","-delimited) with the columns "index", "callset" and truthset", where "index" is an iteration from 1 to n (number of samples) and "callset"/"truthset" are paths to the respectively matching callset VCF files. The command is +``` +nextflow run eval.nf -profile local,conda --sample_sheet +``` +Note: Callsets can optionally be _gzip_ compressed.
⚠️ Run commands from the root directory Without further ado, please run the commands from a terminal at the top folder (root directory) of this repository. diff --git a/eval.nf b/eval.nf index 9898119..69d5622 100644 --- a/eval.nf +++ b/eval.nf @@ -11,13 +11,13 @@ workflow{ ch_ref = Channel.value("$baseDir/" + params.reference) ch_ref_idx = SAMTOOLS_FAIDX(ch_ref) - if (params.callsets_dir != "") { + if (params.callsets_dir != "" && params.sample_sheet == "") { ch_callsets = Channel.fromPath(params.callsets_dir + "/" + "*.{vcf,vcf.gz}") ch_callsets .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('.vcf', '').replaceFirst('.gz', '').toInteger(), file(it)) } .set {ch_callsets} - ch_callsets.view() + // ch_callsets.view() ch_truthsets = Channel.fromPath(params.outdir + "/" + "simulated_hap*.vcf") ch_truthsets @@ -29,17 +29,17 @@ workflow{ .set {ch_variantsets_map} // ch_variantsets_map.view() - } else if (params.sample_sheet != "") { + } else if (params.sample_sheet != "" && params.callsets_dir == "") { ch_variantsets_map = Channel .fromPath(params.sample_sheet, checkIfExists: true) .splitCsv(header: true, sep: ",") .map {row -> [row["index"] as Integer, row["callset"], row["truthset"]]} - .view() + // .view() } else { - exit 1, "ERROR: Either the sample_sheet or callsets_dir parameter has to be provided!\n" + exit 1, "ERROR: Data input incorrect - please supply only one of the following parameters: sample_sheet, callsets_dir\n" } diff --git a/nextflow.config b/nextflow.config index ff76e99..f49f4c6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,12 +11,12 @@ manifest { params { // Individual parameters n = 3 - reference = 'ressources/MN908947.3.fasta' + reference = 'reference/Sars-Cov-2/Wuhan-Hu-1/MN908947.3.fasta' read_type = 'ngs' // General parameters seed = 479 - outdir = 'hap_results' + outdir = 'results' // NGS (WGS) - Read simulation parameters nb_frag = 3000