-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.nf
159 lines (122 loc) · 4.23 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
nextflow.enable.dsl=2
include { bwa_index as index_target; bwa_index as index_background } from './modules/bwa.nf'
include { get_mapped_reads; concat_reads; get_mapped_reads_notbg } from './modules/samtools.nf'
include { d2s_pair; gather_d2s_matrix } from './modules/d2ssect.nf'
include { jellyfish_count } from './modules/jellyfish.nf'
include { count_reads } from './modules/shell.nf'
include { subsample_reads } from './modules/seqtk.nf'
params.target_ref=null
params.background_ref=null
params.outdir=null
// After discarding samples with <n_reads, a random subsample of n_reads will be used
// If n_reads is not set the worflow will stop after calculating read depths
// to allow the user to choose a threshold
//
params.n_reads=null
if(!params.outdir){
log.error "No outdir provided. Provide one with --outdir myoutdir"
exit 1
}
workflow d2s {
take:
reads
main:
ch_jf = reads | jellyfish_count
ch_d2s_inputs = reads.join(ch_jf)
ch_pairs = ch_d2s_inputs.combine(ch_d2s_inputs).map {
m1,fq1,jf1,m2,fq2,jf2 -> {
v=null
if (m1.i<m2.i){
v=[m1,fq1,jf1,m2,fq2,jf2]
}
v
}
}
pairs = d2s_pair(ch_pairs) | collect
gather_d2s_matrix(pairs)
}
workflow {
ch_input_sample = extract_csv(file(params.samples, checkIfExists: true))
if (params.target_ref){
target_ref_fasta = Channel.fromPath(file(params.target_ref, checkIfExists:true)) | collect
target_ref_index = index_target(target_ref_fasta) | collect
}
if (params.background_ref){
background_ref_fasta = Channel.fromPath(file(params.background_ref, checkIfExists:true)) | collect
background_ref_index = index_background(background_ref_fasta)| collect
}
// There are three possible ref configs
// 1. No ref
// 2. target_ref only
// 3. Both target_ref and background_ref
if (!params.target_ref && !params.background_ref){
ch_reads = ch_input_sample | concat_reads
} else if (params.target_ref && !params.background_ref){
ch_reads = get_mapped_reads(ch_input_sample,target_ref_fasta,target_ref_index)
} else if (params.target_ref && params.background_ref){
ch_reads = get_mapped_reads_notbg(ch_input_sample,target_ref_fasta,target_ref_index,background_ref_fasta,background_ref_index)
} else {
log.error "background_ref provided but not target_ref. When providing a single reference it must be labelled refa"
}
if (!params.n_reads){
log.error "n_reads not provided. \n When this run is complete use read counts in <outdir>/sample_counts.txt to choose an appropriate value and then rerun with --n_reads X -resume where X is your chosen read threshold"
ch_counts = ch_reads.map {
count = file(it[1]).countFastq()
"${it[0].sample},${count}"
}
ch_counts.view()
ch_counts.collectFile(name:'sample_counts.txt', newLine: true, storeDir: params.outdir)
.subscribe {
println "Entries are saved to file: $it"
}
} else {
ch_counted_reads = ch_reads.map {
count = file(it[1]).countFastq()
it[0]['count'] = count
it
}
i=1
ch_filtered_samples = ch_counted_reads.filter {
it[0].count >= params.n_reads
}.map {
it[0].i = i
i=i+1
it
}
ch_filtered_samples.view()
ch_sampled_reads = subsample_reads(ch_filtered_samples,params.n_reads)
d2s(ch_sampled_reads)
}
}
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
FUNCTIONS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
def resolve_path(pathstring){
if(pathstring =~ /^\//){
pathstring
} else {
"${params.base_path}/${pathstring}"
}
}
def extract_csv(csv_file) {
i=1
Channel.from(csv_file).splitCsv(header: true)
.map{ row ->
def meta = [:]
meta.sample = row.sample
def fastq_1 = file(resolve_path(row.fastq_1), checkIfExists: true)
def fastq_2 = row.fastq_2 ? file(resolve_path(row.fastq_2), checkIfExists: true) : null
meta.single_end = row.fastq_2 ? false : true
meta.i = i
i=i+1
if (!meta.single_end){
reads = [fastq_1,fastq_2]
} else {
reads = [fastq_1]
}
reads.removeAll([null])
[meta,reads]
}
}