Merge pull request #42 from rki-mf1/feature_sample_sheet

added sample_sheet feature
rki-mf1 · May 6, 2024 · 921f7c3 · 921f7c3
2 parents a81ebd8 + 8b8f17e
commit 921f7c3
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 20 deletions.
diff --git a/README.md b/README.md
@@ -69,10 +69,11 @@ The minimal command to evaluate the accordance between a truthset (generated dat
 nextflow run eval.nf -profile local,conda --callsets_dir <path/to/callsets>
 ```
 where `--callsets_dir` is the parameter to specify a folder containing the callset VCF files.
-Currently, a callset within this folder has to follow the naming convention `callset_<X>.vcf[.gz]` where _\<X\>_ is the integer of the corresponding truthset.
-Callsets can optionally be _gzip_ compressed.
-
-🚧 For convenience, the `eval.nf` will get an option to provide a sample sheet as an alternative input format in the future.
+Currently, a callset within this folder has to follow the naming convention `callset_<X>.vcf[.gz]` where _\<X\>_ is the integer of the corresponding truthset. Alternatively, one can provide a sample sheet (","-delimited) with the columns "index", "callset" and truthset", where "index" is an iteration from 1 to n (number of samples) and "callset"/"truthset" are paths to the respectively matching callset VCF files. The command is
+```
+nextflow run eval.nf -profile local,conda --sample_sheet <path/to/sample_sheet>
+```
+Note: Callsets can optionally be _gzip_ compressed.
 
 <details><summary>⚠️ Run commands from the root directory </summary>
 Without further ado, please run the commands from a terminal at the top folder (root directory) of this repository.

diff --git a/eval.nf b/eval.nf
@@ -11,22 +11,37 @@ workflow{
     ch_ref      = Channel.value("$baseDir/" + params.reference)
     ch_ref_idx  = SAMTOOLS_FAIDX(ch_ref)
 
-    ch_callsets = Channel.fromPath(params.callsets_dir + "/" + "*.{vcf,vcf.gz}")
-    ch_callsets
-        .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('.vcf', '').replaceFirst('.gz', '').toInteger(), file(it)) }
-        .set {ch_callsets}
-    //ch_callsets.view()
-
-    ch_truthsets = Channel.fromPath(params.outdir + "/" + "simulated_hap*.vcf")
-    ch_truthsets
-        .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('hap', '').replaceFirst('.vcf', '').toInteger(), file(it)) }
-        .set {ch_truthsets}
-    //ch_truthsets.view()
-
-    ch_truthsets.join(ch_callsets, by: 0)
-        .set {ch_variantsets_map}
-    //ch_variantsets_map.view()
+    if (params.callsets_dir != "" && params.sample_sheet == "") {
 
+        ch_callsets = Channel.fromPath(params.callsets_dir + "/" + "*.{vcf,vcf.gz}")
+        ch_callsets
+            .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('.vcf', '').replaceFirst('.gz', '').toInteger(), file(it)) }
+            .set {ch_callsets}
+        // ch_callsets.view()
+
+        ch_truthsets = Channel.fromPath(params.outdir + "/" + "simulated_hap*.vcf")
+        ch_truthsets
+            .map { it -> tuple(it.toString().split('/')[-1].tokenize('_')[1].replaceFirst('hap', '').replaceFirst('.vcf', '').toInteger(), file(it)) }
+            .set {ch_truthsets}
+        // ch_truthsets.view()
+
+        ch_truthsets.join(ch_callsets, by: 0)
+            .set {ch_variantsets_map}
+        // ch_variantsets_map.view()
+
+    } else if (params.sample_sheet != "" && params.callsets_dir == "") { 
+
+        ch_variantsets_map = Channel
+            .fromPath(params.sample_sheet, checkIfExists: true)
+            .splitCsv(header: true, sep: ",")
+            .map {row -> [row["index"] as Integer, row["callset"], row["truthset"]]}
+            // .view()
+
+    } else {
+
+        exit 1, "ERROR: Data input incorrect - please supply only one of the following parameters: sample_sheet, callsets_dir\n"
+
+    }
 
     // ------------------
     // | Main processes |

diff --git a/nextflow.config b/nextflow.config
@@ -35,7 +35,8 @@ params {
     nb_reads = 180
 
     // Evaluation parameters
-    callsets_dir = 'data'
+    callsets_dir = ''
+    sample_sheet = ''
 }
 
 // Enable execution report
@@ -63,4 +64,9 @@ profiles {
         executor.name = "local"
         executor.cpus = 4
     }
+
+    slurm {
+        executor.name = "slurm"
+        executor.cpus = 4
+    }
 }