-
Notifications
You must be signed in to change notification settings - Fork 115
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for generating taxprofiler/funcscan input samplesheets for preprocessed FASTQs/FASTAs #688
base: dev
Are you sure you want to change the base?
Add support for generating taxprofiler/funcscan input samplesheets for preprocessed FASTQs/FASTAs #688
Changes from 12 commits
2a48e7f
3b80e0c
2bd8352
a744175
8672790
be91462
e9df126
e441e76
ddb9c96
bf11fb3
8724961
b354da9
f6b9a99
bec8347
67958ec
997674a
aa71298
0163690
535747c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, since FastQ files are being pulled from the publishDir, it might be a good idea to include options that override user inputs for params.publish_dir_mode (so that it is always 'copy' if a samplesheet is generated) and params.save_clipped_reads, params.save_phixremoved_reads ...etc so that the preprocessed FastQ files are published to the params.outdir if a downstream samplesheet is generated |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
// | ||
// Subworkflow with functionality specific to the nf-core/mag pipeline | ||
// | ||
|
||
workflow SAMPLESHEET_TAXPROFILER { | ||
take: | ||
ch_reads | ||
|
||
main: | ||
format = 'csv' | ||
|
||
def fastq_rel_path = '/' | ||
if (params.bbnorm) { | ||
fastq_rel_path = '/bbmap/bbnorm/' | ||
} else if (!params.keep_phix) { | ||
fastq_rel_path = '/QC_shortreads/remove_phix/' | ||
} | ||
else if (params.host_fasta) { | ||
fastq_rel_path = '/QC_shortreads/remove_host/' | ||
} | ||
else if (!params.skip_clipping) { | ||
fastq_rel_path = '/QC_shortreads/fastp/' | ||
} | ||
|
||
ch_list_for_samplesheet = ch_reads | ||
.map { | ||
meta, fastq -> | ||
def sample = meta.id | ||
def run_accession = meta.id | ||
def instrument_platform = "" | ||
def fastq_1 = file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[0].getName() | ||
def fastq_2 = file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[1].getName() | ||
def fasta = "" | ||
[ sample: sample, run_accession: run_accession, instrument_platform: instrument_platform, fastq_1: fastq_1, fastq_2: fastq_2, fasta: fasta ] | ||
} | ||
.tap{ ch_colnames } | ||
jfy133 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/mag", format) | ||
|
||
} | ||
|
||
workflow SAMPLESHEET_FUNCSCAN { | ||
take: | ||
ch_assemblies | ||
|
||
main: | ||
format = 'csv' | ||
|
||
ch_list_for_samplesheet = ch_assemblies | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Next thing which I don't think will be so complicated is to add another input channel for bins, and here make an if/else statement if they want to send just the raw assemblies (all contigs) or binned contigs to the samplesheet. It will need another pipeline level parameter too though |
||
.map { | ||
meta, filename -> | ||
def sample = meta.id | ||
def fasta = file(params.outdir).toString() + '/Assembly/' + meta.assembler + '/' + filename.getName() | ||
[ sample: sample, fasta: fasta ] | ||
} | ||
.tap{ ch_colnames } | ||
|
||
channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/funcscan", format) | ||
} | ||
|
||
workflow GENERATE_DOWNSTREAM_SAMPLESHEETS { | ||
take: | ||
ch_reads | ||
ch_assemblies | ||
|
||
main: | ||
def downstreampipeline_names = params.generate_pipeline_samplesheets.split(",") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've also implemented the same system in createtaxdb now, but with an additional input validation thing that you should also adopt here (i.e., to check that someone doesn't add an unsupported pipeline, or makes a typo). Check the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
|
||
if ( downstreampipeline_names.contains('taxprofiler') && params.save_clipped_reads ) { // save_clipped_reads must be true | ||
SAMPLESHEET_TAXPROFILER(ch_reads) | ||
} | ||
|
||
if ( downstreampipeline_names.contains('funcscan') ) { | ||
SAMPLESHEET_FUNCSCAN(ch_assemblies) | ||
} | ||
} | ||
|
||
def channelToSamplesheet(ch_list_for_samplesheet, path, format) { | ||
def format_sep = [csv: ",", tsv: "\t", txt: "\t"][format] | ||
|
||
def ch_header = ch_list_for_samplesheet | ||
|
||
ch_header | ||
.first() | ||
.map { it.keySet().join(format_sep) } | ||
.concat(ch_list_for_samplesheet.map { it.values().join(format_sep) }) | ||
.collectFile( | ||
name: "${path}.${format}", | ||
newLine: true, | ||
sort: false | ||
) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -118,6 +118,11 @@ workflow PIPELINE_INITIALISATION { | |
// | ||
validateInputParameters( | ||
hybrid | ||
jfy133 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// Validate samplesheet generation parameters | ||
if (params.generate_downstream_samplesheets && !params.generate_pipeline_samplesheets) { | ||
error('[nf-core/createtaxdb] If supplying `--generate_downstream_samplesheets`, you must also specify which pipeline to generate for with `--generate_pipeline_samplesheets! Check input.') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nf-core/mag ? |
||
} | ||
) | ||
|
||
// Validate PRE-ASSEMBLED CONTIG input when supplied | ||
|
@@ -330,6 +335,19 @@ def validateInputParameters(hybrid) { | |
if (params.save_mmseqs_db && !params.metaeuk_mmseqs_db) { | ||
error('[nf-core/mag] ERROR: Invalid parameter combination: --save_mmseqs_db supplied but no database has been requested for download with --metaeuk_mmseqs_db!') | ||
} | ||
|
||
// Validate samplesheet generation parameters | ||
if (params.generate_downstream_samplesheets && !params.generate_pipeline_samplesheets) { | ||
error('[nf-core/mag] If supplying `--generate_downstream_samplesheets`, you must also specify which pipeline to generate for with `--generate_pipeline_samplesheets! Check input.') | ||
} | ||
|
||
if (params.generate_downstream_samplesheets && !params.save_clipped_reads) { | ||
error('[nf-core/mag] Supplied --generate_downstream_samplesheets but missing --save_clipped_reads (mandatory for reads.gz output).') | ||
} | ||
|
||
if (params.generate_downstream_samplesheets && params.save_clipped_reads && (params.bbnorm || !params.keep_phix || params.host_fasta || params.skip_clipping)) { | ||
error('[nf-core/mag] Supplied --generate_downstream_samplesheets and --save_clipped_reads is true, but also need one of the following: --bbnorm true, or --keep_phix false, or --host_fasta true, or skip_clipping true.') | ||
} | ||
} | ||
|
||
// | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,7 @@ include { ANCIENT_DNA_ASSEMBLY_VALIDATION } from '../subworkflows/local/ancient_ | |
include { DOMAIN_CLASSIFICATION } from '../subworkflows/local/domain_classification' | ||
include { DEPTHS } from '../subworkflows/local/depths' | ||
include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. spacing :D |
||
include { GENERATE_DOWNSTREAM_SAMPLESHEETS } from '../subworkflows/local/generate_downstream_samplesheets/main.nf' | ||
|
||
// | ||
// MODULE: Installed directly from nf-core/modules | ||
|
@@ -958,6 +959,13 @@ workflow MAG { | |
} | ||
} | ||
|
||
// | ||
// Samplesheet generation | ||
// | ||
if ( params.generate_downstream_samplesheets ) { | ||
GENERATE_DOWNSTREAM_SAMPLESHEETS ( ch_short_reads_assembly, ch_assemblies ) | ||
} | ||
|
||
// | ||
// Collate and save software versions | ||
// | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like @jfy133 used only one workflow, which will selectively generate samplesheets based on params.generate_pipeline_samplesheets. Do you think it would be best to keep that consistent?