From b4f3f1f949229d52b82b45a35fee80e516f6e055 Mon Sep 17 00:00:00 2001 From: Flip Mulder <47565214+ffmmulder@users.noreply.github.com> Date: Sun, 4 Jul 2021 09:20:27 +0200 Subject: [PATCH 1/4] Use common main config files for common data Extract processes, resources and nextflow settings from all custom config files to main config files for re-use and easier modifications --- conf/nextflow.config | 59 ++++++++ conf/process.config | 318 ++++++++++++++++++++++++++++++++++++++++++ conf/resources.config | 105 ++++++++++++++ 3 files changed, 482 insertions(+) create mode 100644 conf/nextflow.config create mode 100644 conf/process.config create mode 100644 conf/resources.config diff --git a/conf/nextflow.config b/conf/nextflow.config new file mode 100644 index 0000000..1377183 --- /dev/null +++ b/conf/nextflow.config @@ -0,0 +1,59 @@ +manifest { + homePage = 'https://github.com/UMCUGenetics/RNASeq-NF' + description = 'RNASeq-NF is an NGS analysis pipeline for RNA expression quantification' + mainScript = 'main.nf' + version = '1.1' + nextflowVersion = '20.04.1' +} + +executor { + $sge { + queueSize = 1000 + pollInterval = '30sec' + queueStatInterval = '5min' + } + $slurm { + queueSize = 1000 + pollInterval = '30sec' + queueStatInterval = '5min' + } +} + +profiles { + sge { + process.executor = 'sge' + process.queue = 'all.q' + process.clusterOptions = '-P compgen ' + } + + slurm { + process.executor = 'slurm' + process.clusterOptions = '--gres=tmpspace:25G' + process.queue = 'cpu' + } +} + +report { + enabled = true + file = "$params.out_dir/log/nextflow_report.html" +} + +trace { + enabled = true + file = "$params.out_dir/log/nextflow_trace.txt" + fields = 'task_id,hash,native_id,process,tag,name,status,exit,module,container,cpus,time,disk,memory,attempt,submit,start,complete,duration,realtime,queue,%cpu,%mem,rss,vmem,peak_rss,peak_vmem,rchar,wchar,syscr,syscw,read_bytes,write_bytes,vol_ctxt,inv_ctxt' +} + +timeline { + enabled = true + file = "$params.out_dir/log/nextflow_timeline.html" +} + +singularity { + enabled = true + autoMounts = true + runOptions = '-B /hpc -B $TMPDIR:$TMPDIR' + cacheDir = '/hpc/local/CentOS7/cog_bioinf/singularity_cache' +} + +cleanup = true diff --git a/conf/process.config b/conf/process.config new file mode 100644 index 0000000..6aa8c32 --- /dev/null +++ b/conf/process.config @@ -0,0 +1,318 @@ +params { + // CustomQC template + rmd_template = "$baseDir/bin/RNASeqNF_QC.Rmd" + + // Standard references + rRNA_database_manifest = "$baseDir/assets/sortmerna-db-default.txt" + + // Custom settings of tools. + options.FastQC = '' + options.TrimGalore = '--fastqc' + options.SortMeRNA = '' + options.STAR = '--outReadsUnmapped Fastx' + options.FeatureCounts = '' + options.Salmon_quant = '--seqBias --useVBOpt --gcBias' + options.Salmon_index = '' + options.Salmon_quantmerge = '' + options.GATK4_SplitIntervals = '-SCATTER_COUNT 100 --UNIQUE true -M BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW' + options.GATK4_HaplotypeCaller = '--standard-min-confidence-threshold-for-calling 20.0 --dont-use-soft-clipped-bases' + options.GATK4_VariantFiltration = '--cluster-window-size 35 --cluster-size 3' + options.GATK4_BQRS = '' + options.Sambamba_MergeBams = '' + options.Preseq = "-v -B -D" + options.MultiQC = '' + + //featureCounts + fc_group_features = 'gene_id' + fc_count_type = 'exon' + fc_group_features_type = 'gene_biotype' + fc_extra_attributes = 'gene_name' + normalize_counts = true + biotypeQC = true + + // Salmon additional options + saveUnaligned = false + + //Memory settings for SGE profile + sambambamarkdup.mem = '32G' + splitintervals.mem = '12G' + baserecalibrator.mem = '9G' + gatherbaserecalibrator.mem = '6G' + applybqsr.mem = '9G' + haplotypecaller.mem = '25G' + mergevcf.mem = '14G' + variantfiltration.mem = '10G' + mergebams.mem = '5G' + multiqc.mem = '4G' +} + +process { + errorStrategy = 'retry' + maxRetries = 1 + withLabel : STAR_2_7_3a_AlignReads { + time = '24h' + penv = 'threaded' + cpus = 4 + memory = '35G' + publishDir.path = "${params.out_dir}/STAR" + publishDir.mode = 'copy' + publishDir.saveAs = {filename -> if (filename.indexOf("_Log.out") > 0) "logs/$filename" + else if (filename.indexOf("_Log.final.out") > 0) "logs/$filename" + else if (filename.indexOf(".bam") > 0) "mapped/$filename" + else if (filename.indexOf("_SJ.out.tab") > 0) "SJ/$filename" + else if (filename.indexOf(".mate1") > 0) "unmapped/$filename" + else if (filename.indexOf(".mate2") > 0) "unmapped/$filename" } + + } + withLabel : Sambamba_0_7_0_Index { + time = '24h' + penv = 'threaded' + cpus = 2 + memory = '10G' + publishDir.path = "${params.out_dir}/STAR" + publishDir.mode = 'copy' + publishDir.saveAs = {filename -> if (filename.indexOf(".bai") > 0) "mapped/$filename" } + } + withLabel : Sambamba_0_7_0_Flagstat { + time = '24h' + penv = 'threaded' + cpus = 2 + memory = '5G' + publishDir.path = "${params.out_dir}/Sambamba/Flagstats" + publishDir.mode = 'copy' + + } + withLabel: RSeQC_3_0_1 { + time = '24h' + penv = 'threaded' + cpus = 2 + memory = '25G' + publishDir.path = "${params.out_dir}/QC/RSeQC" + publishDir.mode = 'copy' + publishDir.saveAs = {filename -> + if (filename.indexOf("bam_stat.txt") > 0) "bam_stat/$filename" + else if (filename.indexOf("infer_experiment.txt") > 0) "infer_experiment/$filename" + else if (filename.indexOf("read_distribution.txt") > 0) "read_distribution/$filename" + else if (filename.indexOf("read_duplication.DupRate_plot.pdf") > 0) "read_duplication/$filename" + else if (filename.indexOf("read_duplication.DupRate_plot.r") > 0) "read_duplication/rscripts/$filename" + else if (filename.indexOf("read_duplication.pos.DupRate.xls") > 0) "read_duplication/dup_pos/$filename" + else if (filename.indexOf("read_duplication.seq.DupRate.xls") > 0) "read_duplication/dup_seq/$filename" + else if (filename.indexOf("RPKM_saturation.eRPKM.xls") > 0) "RPKM_saturation/rpkm/$filename" + else if (filename.indexOf("RPKM_saturation.rawCount.xls") > 0) "RPKM_saturation/counts/$filename" + else if (filename.indexOf("RPKM_saturation.saturation.pdf") > 0) "RPKM_saturation/$filename" + else if (filename.indexOf("RPKM_saturation.saturation.r") > 0) "RPKM_saturation/rscripts/$filename" + else if (filename.indexOf("inner_distance.txt") > 0) "inner_distance/$filename" + else if (filename.indexOf("inner_distance_freq.txt") > 0) "inner_distance/data/$filename" + else if (filename.indexOf("inner_distance_plot.r") > 0) "inner_distance/rscripts/$filename" + else if (filename.indexOf("inner_distance_plot.pdf") > 0) "inner_distance/plots/$filename" + else if (filename.indexOf("junction_plot.r") > 0) "junction_annotation/rscripts/$filename" + else if (filename.indexOf("junction.xls") > 0) "junction_annotation/data/$filename" + else if (filename.indexOf(".junction_annotation_log.txt") > 0) "junction_annotation/log/$filename" + else if (filename.indexOf("splice_events.pdf") > 0) "junction_annotation/events/$filename" + else if (filename.indexOf("splice_junction.pdf") > 0) "junction_annotation/junctions/$filename" + else if (filename.indexOf("junctionSaturation_plot.pdf") > 0) "junction_saturation/$filename" + else if (filename.indexOf("junctionSaturation_plot.r") > 0) "junction_saturation/rscripts/$filename" + else filename } + } + withLabel: RSeQC_3_0_1_TIN { + time = '24h' + penv = 'threaded' + cpus = 2 + memory = '35G' + publishDir.path = "${params.out_dir}/QC/RSeQC/TIN" + publishDir.mode = 'copy' + } + withLabel: Preseq_2_0_3 { + time = '24h' + penv = 'threaded' + cpus = 2 + memory = '15G' + publishDir.path = "${params.out_dir}/QC/Preseq" + publishDir.mode = 'copy' + publishDir.saveAs = {filename -> if (filename.indexOf(".ccurve.txt") > 0) "complexity_curve/$filename" } + } + withLabel : Sambamba_0_7_0_Markdup { + time = '24h' + penv = 'threaded' + cpus = 6 + memory = '32G' + //publishDir.path = "${params.out_dir}/Sambamba/Markdup" + //publishDir.mode = 'copy' + } + withLabel : GATK_4_1_3_0_SplitNCigarReads { + time = '24h' + penv = 'threaded' + cpus = 2 + memory = '12G' + //publishDir.path = "${params.out_dir}/GATK/bam" + } + withLabel : Salmon_1_2_1_Quant { + time = '24h' + penv = 'threaded' + cpus = 2 + memory = '25G' + publishDir.path = "${params.out_dir}/Salmon" + publishDir.mode = 'copy' + } + withLabel : Salmon_1_2_1_QuantMerge { + time = '24h' + penv = 'threaded' + cpus = 2 + memory = '5G' + publishDir.path = "${params.out_dir}/Salmon/merged" + publishDir.mode = 'copy' + } + withLabel : Salmon_1_2_1_Index { + time = '24h' + penv = 'threaded' + cpus = 2 + memory = '25G' + publishDir.path = "${params.out_dir}/reference_resources/Salmon" + publishDir.mode = 'copy' + } + withLabel: GATK_4_1_3_0_SplitIntervals { + memory = '12G' + time = '1h' + } + withLabel : GATK_4_1_3_0_BaseRecalibration { + memory = '9G' + time = '1h' + } + withLabel : GATK_4_1_3_0_GatherBaseRecalibrationTables { + memory = '6G' + time = '1h' + } + withLabel : GATK_4_1_3_0_BaseRecalibrationTable { + memory = '9G' + time = '1h' + } + withLabel : GATK_4_1_3_0_HaplotypeCaller { + memory = '25G' + time = '12h' + } + withLabel : GATK_4_1_3_0_MergeVCFs { + memory = '14G' + time = '4h' + publishDir.path = "${params.out_dir}/GATK/vcf/raw" + publishDir.mode = 'copy' + } + withLabel : GATK_4_1_3_0_VariantFiltration { + memory = '10G' + time = '4h' + publishDir.path = "${params.out_dir}/GATK/vcf/filtered" + publishDir.mode = 'copy' + } + withLabel : Sambamba_0_7_0_Merge { + cpus = 10 + penv = 'threaded' + memory = '5G' + time = '2h' + //publishDir.path = "${params.out_dir}/GATK/bam" + } + withLabel : MultiQC_1_9 { + memory = '4G' + time = '1h' + publishDir.path = "${params.out_dir}/report/MultiQC" + publishDir.mode = 'copy' + } + withLabel : rnaseqnfqc_1_0_0 { + memory = '4G' + time = '1h' + publishDir.path = "${params.out_dir}/report/Custom_QC" + publishDir.mode = 'copy' + } + withLabel : biconductor_3_20_7_edger_normalize { + memory = '5G' + time = '1h' + publishDir.path = "${params.out_dir}/featureCounts" + publishDir.mode = 'copy' + } + withLabel : biconductor_1_28_0_deseq2normalize { + memory = '5G' + time = '1h' + publishDir.path = "${params.out_dir}/featureCounts" + publishDir.mode = 'copy' + + } + withLabel : Subread_2_0_0_FeatureCounts { + time = '24h' + penv = 'threaded' + cpus = 10 + memory = '25G' + publishDir.path = "${params.out_dir}/featureCounts" + publishDir.mode = 'copy' + publishDir.saveAs = {filename -> if (filename.indexOf("_biotype_") > 0) "biotypes/$filename" + else filename } + + } + withLabel : STAR_2_7_3a_GenomeGenerate { + time = '24h' + penv = 'threaded' + cpus = 12 + memory = '150G' + publishDir.path = "${params.out_dir}/reference_resources/STAR" + publishDir.mode = 'copy' + } + withLabel : ucsc_377_GenePredToBed { + time = '24h' + penv = 'threaded' + cpus = 2 + memory = '15G' + publishDir.path = "${params.out_dir}/reference_resources" + publishDir.mode = 'copy' + } + withLabel : ucsc_377_GtfToGenePred { + time = '24h' + penv = 'threaded' + cpus = 2 + memory = '15G' + } + withLabel : CreateIntervalList { + time = '1h' + publishDir.path = "${params.out_dir}/reference_resources" + publishDir.mode = 'copy' + } + withLabel : FASTQC_0_11_8 { + time = '1h' + publishDir.path = "${params.out_dir}/QC/FastQC/raw" + publishDir.mode = 'copy' + } + withLabel : SortMeRNA_4_2_0 { + time = '24h' + penv = 'threaded' + cpus = 4 + memory = '15G' + publishDir.path = "${params.out_dir}/QC/" + publishDir.mode = 'copy' + publishDir.saveAs = {filename -> + if (filename.indexOf("_rRNA_report.txt") > 0) "SorteMeRNA/logs/$filename" + else if (filename.indexOf("_filtered_rRNA.fastq.gz") > 0) "SorteMeRNA/rRNA-reads/$filename" + else null } + + } + withLabel : SortMeRNA_4_3_3 { + time = '24h' + penv = 'threaded' + cpus = 4 + memory = '15G' + publishDir.path = "${params.out_dir}/QC/" + publishDir.mode = 'copy' + publishDir.saveAs = {filename -> + if (filename.indexOf("_rRNA_report.txt") > 0) "SorteMeRNA/logs/$filename" + else if (filename.indexOf("_filtered_rRNA.fastq.gz") > 0) "SorteMeRNA/rRNA-reads/$filename" + else null } + + } + withLabel : TrimGalore_0_6_5 { + time = '24h' + penv = 'threaded' + cpus = 2 + memory = '6G' + publishDir.path = "${params.out_dir}/QC/TrimGalore" + publishDir.mode = 'copy' + publishDir.saveAs = {filename -> + if (filename.indexOf("_fastqc") > 0) "FastQC/$filename" + else if (filename.indexOf("trimming_report.txt") > 0) "logs/$filename" + else null } + } +} \ No newline at end of file diff --git a/conf/resources.config b/conf/resources.config new file mode 100644 index 0000000..c163c51 --- /dev/null +++ b/conf/resources.config @@ -0,0 +1,105 @@ +params { + + //Default resource folder + resource_dir = "/hpc/ubec/resources/" + + //These are set to empty by default and can/must be overwritten per genome when required (only works with empty values?) + genome_bed = "" + genome_known_sites = [] + scatter_interval_list = "" + salmon_index = "" + transcripts_fasta = "" + + genomes { + "Felis_Catus_9.0" { + genome_fasta = "${params.resource_dir}/genomes/Felis_catus_9.0_ensembl/Felis_catus_9.0.dna.toplevel.fa" + genome_gtf = "${params.resource_dir}/genomes/Felis_catus_9.0_ensembl/Felis_catus_9.0.dna.toplevel.gtf" + genome_bed = "" + genome_dict = "${params.resource_dir}/genomes/Felis_catus_9.0_ensembl/Felis_catus_9.0.dna.toplevel.dict" + genome_index = "${params.resource_dir}/genomes/Felis_catus_9.0_ensembl/Felis_catus_9.0.dna.toplevel.fa.fai" + star_index = "${params.resource_dir}/tools/star/2.7.3a/Felis_catus_9.0" + } + "GRCh37" { + genome_fasta = "${params.resource_dir}/genomes/GRCh37/Sequence/genome.fa" + genome_gtf = "${params.resource_dir}/genomes/GRCh37/Annotation/Homo_sapiens.GRCh37.74.gtf" + genome_bed = "${params.resource_dir}/genomes/GRCh37/Annotation/Homo_sapiens.GRCh37.74.sorted.bed12" + genome_dict = "${params.resource_dir}/genomes/GRCh37/Sequence/genome.dict" + genome_index = "${params.resource_dir}/genomes/GRCh37/Sequence/genome.fa.fai" + genome_known_sites = ["${params.resource_dir}/genomes/GRCh37/Annotation/1000G_phase1.indels.b37.vcf.gz", + "${params.resource_dir}/genomes/GRCh37/Annotation/dbsnp_138.b37.vcf.gz", + "${params.resource_dir}/genomes/GRCh37/Annotation/Mills_and_1000G_gold_standard.indels.b37.vcf.gz"] + scatter_interval_list = "${params.resource_dir}/genomes/GRCh37/Sequence/genome.interval_list" + star_index = "${params.resource_dir}/tools/star/2.7.3a/GRCh37" + salmon_index = "${params.resource_dir}/tools/salmon/1.2.1/Homo_sapiens.GRCh37.75.cdna.all" + transcripts_fasta = "${params.resource_dir}/tools/rsem/transcripts/Homo_sapiens.GRCh37/GRCh37.transcripts.fa" + } + "GRCh38" { + genome_fasta = "${params.resource_dir}/genomes/GRCh38/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna" + genome_gtf = "${params.resource_dir}/tools/rnaseq-nf/genome_files/GRCh38/Homo_sapiens.GRCh38.104.chr.ncbi.gtf" + genome_bed = "${params.resource_dir}/tools/rnaseq-nf/genome_files/GRCh38/Homo_sapiens.GRCh38.104.chr.ncbi.sorted.bed12" + genome_dict = "${params.resource_dir}/genomes/GRCh38/GCA_000001405.15_GRCh38_no_alt_analysis_set.dict" + genome_index = "${params.resource_dir}/genomes/GRCh38/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.fai" + genome_known_sites = ["${params.resource_dir}/tools/rnaseq-nf/genome_files/GRCh38/GATK_ResourceBundle/Homo_sapiens_assembly38.known_indels.vcf.gz", + "${params.resource_dir}/tools/rnaseq-nf/genome_files/GRCh38/GATK_ResourceBundle/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz", + "${params.resource_dir}/tools/rnaseq-nf/genome_files/GRCh38/GATK_ResourceBundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"] + scatter_interval_list = "${params.resource_dir}/tools/rnaseq-nf/genome_files/GRCh38/GCA_000001405.15_GRCh38_no_alt_analysis_set.interval_list" + star_index = "${params.resource_dir}/tools/star/2.7.3a/GRCh38" + } + "GRCm38" { + genome_fasta = "${params.resource_dir}/genomes/Mus_musculus.GRCm38/Mm_GRCm38_gatk_sorted.fasta" + genome_index = "${params.resource_dir}/genomes/Mus_musculus.GRCm38/Mm_GRCm38_gatk_sorted.fasta.fai" + genome_dict = "${params.resource_dir}/genomes/Mus_musculus.GRCm38/Mm_GRCm38_gatk_sorted.dict" + genome_gtf = "${params.resource_dir}/genomes/Mus_musculus.GRCm38/Mus_musculus.GRCm38.70.gtf" + genome_bed = "${params.resource_dir}/genomes/Mus_musculus.GRCm38/Mus_musculus.GRCm38.70.sorted.bed12" + star_index = "${params.resource_dir}/tools/star/2.7.3a/Mm_GRCm38_gatk_sorted/" + } + "GRCh37_GRCm38" { + genome_fasta = "${params.resource_dir}/genomes/GRCh37_MmGrcm38/Human_GRCh37_Mm_GRCm38.fasta" + genome_gtf = "${params.resource_dir}/genomes/GRCh37_MmGrcm38/GRCh37_GRCm38.gtf" + genome_bed = "" + genome_dict = "${params.resource_dir}/genomes/GRCh37_MmGrcm38/Human_GRCh37_Mm_GRCm38.dict" + genome_index = "${params.resource_dir}/genomes/GRCh37_MmGrcm38/Human_GRCh37_Mm_GRCm38.fa.fai" + genome_known_sites = ["${params.resource_dir}/genomes/GRCh37/Annotation/1000G_phase1.indels.b37.vcf.gz", + "${params.resource_dir}/genomes/GRCh37/Annotation/dbsnp_138.b37.vcf.gz", + "${params.resource_dir}/genomes/GRCh37/Annotation/Mills_and_1000G_gold_standard.indels.b37.vcf.gz"] + scatter_interval_list = "${params.resource_dir}/genomes/GRCh37_MmGrcm38/Human_GRCh37_Mm_GRCm38.interval_list" + star_index = "${params.resource_dir}/tools/star/2.7.3a/GRCh37_GRCm38" + } + "CanFam3.1" { + genome_fasta = "${params.resource_dir}/genomes/CanFam3.1.103/Canis_lupus_familiaris.CanFam3.1.dna.toplevel.fa" + genome_index = "${params.resource_dir}/genomes/CanFam3.1.103/Canis_lupus_familiaris.CanFam3.1.dna.toplevel.fa.fai" + genome_dict = "${params.resource_dir}/genomes/CanFam3.1.103/Canis_lupus_familiaris.CanFam3.1.dna.toplevel.dict" + genome_gtf = "${params.resource_dir}/genomes/CanFam3.1.103/Canis_lupus_familiaris.CanFam3.1.103.gtf" + genome_bed = "" + star_index = "${params.resource_dir}/tools/star/2.7.3a/CanFam3.1.103" + } + "ITAG4.0" { + genome_fasta = "${params.resource_dir}/genomes/Solanum_lycopersicum.ITAG4.0/S_lycopersicum_chromosomes.4.00.fa" + genome_index = "${params.resource_dir}/genomes/Solanum_lycopersicum.ITAG4.0/S_lycopersicum_chromosomes.4.00.fa.fai" + genome_dict = "${params.resource_dir}/genomes/Solanum_lycopersicum.ITAG4.0/S_lycopersicum_chromosomes.4.00.dict" + genome_gtf = "${params.resource_dir}/genomes/Solanum_lycopersicum.ITAG4.0/ITAG4.0_gene_models.gtf" + genome_bed = "" + star_index = "${params.resource_dir}/tools/star/2.7.3a/Solanum_lycopersicum.ITAG4.0/" + } + "UMCU_CholrocebusSabaeus_1.1" { + genome_fasta = "${params.resource_dir}/genomes/Chlorocebus_sabaeus/Chlorocebus_sabaeus.ChlSab1.1.dna.toplevel.fa" + genome_index = "${params.resource_dir}/genomes/Chlorocebus_sabaeus/Chlorocebus_sabaeus.ChlSab1.1.dna.toplevel.fa.fai" + genome_dict = "${params.resource_dir}/genomes/Chlorocebus_sabaeus/Chlorocebus_sabaeus.ChlSab1.1.dna.toplevel.dict" + genome_gtf = "${params.resource_dir}/genomes/Chlorocebus_sabaeus/Chlorocebus_sabaeus.ChlSab1.1.103.gtf" + genome_bed = "" + star_index = "${params.resource_dir}/tools/star/2.7.3a/Chlorocebus_sabaeus" + } + "LK_LSativa_v8" { + genome_fasta = "/hpc/uu_lk/GENOMES/LGR_CoGe/Lactuca_sativa_cv_Salinas_V8/sequence/Lactuca_sativa_lettuce.fa" + genome_dict = "/hpc/uu_lk/GENOMES/LGR_CoGe/Lactuca_sativa_cv_Salinas_V8/sequence/Lactuca_sativa_lettuce.dict" + genome_index = "/hpc/uu_lk/GENOMES/LGR_CoGe/Lactuca_sativa_cv_Salinas_V8/sequence/Lactuca_sativa_lettuce.fa.fai" + genome_gtf = "/hpc/uu_lk/GENOMES/LGR_CoGe/Lactuca_sativa_cv_Salinas_V8/annotation/Lactuca_sativa_lettuce_rename.gtf" + genome_bed = "/hpc/uu_lk/GENOMES/LGR_CoGe/Lactuca_sativa_cv_Salinas_V8/annotation/Lactuca_sativa_lettuce_rename.sorted.bed12" + transcripts_fasta = "/hpc/uu_lk/GENOMES/LGR_CoGe/Lactuca_sativa_cv_Salinas_V8/sequence/transcripts/RSEM_Lsv8.transcripts.fa" + star_index = "/hpc/uu_lk/GENOMES/LGR_CoGe/Lactuca_sativa_cv_Salinas_V8/STAR/Lactuca_sativa_lettuce" + salmon_index = "/hpc/uu_lk/GENOMES/LGR_CoGe/Lactuca_sativa_cv_Salinas_V8/Salmon/RSEM_Lsv8.transcripts" + } + } +} + + \ No newline at end of file From e073f87b2a226a97e40cd7e55d8f29442fa7b07e Mon Sep 17 00:00:00 2001 From: Flip Mulder <47565214+ffmmulder@users.noreply.github.com> Date: Sun, 4 Jul 2021 09:22:09 +0200 Subject: [PATCH 2/4] Adjusted example configs Modified sample configs for GRCh37 using common resource configs --- confs/UMCU_GRCh37_PE.config | 50 +++++++++++++++++++++++++++++++++++++ confs/UMCU_GRCh37_SE.config | 50 +++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 confs/UMCU_GRCh37_PE.config create mode 100644 confs/UMCU_GRCh37_SE.config diff --git a/confs/UMCU_GRCh37_PE.config b/confs/UMCU_GRCh37_PE.config new file mode 100644 index 0000000..6dde92d --- /dev/null +++ b/confs/UMCU_GRCh37_PE.config @@ -0,0 +1,50 @@ +includeConfig 'conf/nextflow.config' +includeConfig 'conf/process.config' +includeConfig 'conf/resources.config' + +params { + + genome = 'GRCh37' + + /*==========================ƒ + workflow steps + /*==========================*/ + + // Pre-alignment processing. + runTrimGalore = true + runSortMeRNA = true + + // Alignment - STAR - Sambamba markdup + runMapping = true + + // Counting - SubRead / FeatureCounts + runFeatureCounts = true + + // Salmon + runSalmon = false + + // QC + runFastQC = true + runMultiQC = true + runPostQC = true + runRSeQC_TIN = true + + // GATK (v4) + runGermlineCallingGATK = false + runGATK4_BQSR = true + /*========================== + Settings + /*==========================*/ + + // Standard options + gencode = false + singleEnd =false + MergeFQ = true + help= false + customQC = true + + // Library strandedness + stranded = true + revstranded = false + unstranded = false +} \ No newline at end of file diff --git a/confs/UMCU_GRCh37_SE.config b/confs/UMCU_GRCh37_SE.config new file mode 100644 index 0000000..61a1b9e --- /dev/null +++ b/confs/UMCU_GRCh37_SE.config @@ -0,0 +1,50 @@ +includeConfig 'conf/nextflow.config' +includeConfig 'conf/process.config' +includeConfig 'conf/resources.config' + +params { + + genome = 'GRCh37' + + /*==========================ƒ + workflow steps + /*==========================*/ + + // Pre-alignment processing. + runTrimGalore = true + runSortMeRNA = true + + // Alignment - STAR - Sambamba markdup + runMapping = true + + // Counting - SubRead / FeatureCounts + runFeatureCounts = true + + // Salmon + runSalmon = false + + // QC + runFastQC = true + runMultiQC = true + runPostQC = true + runRSeQC_TIN = true + + // GATK (v4) + runGermlineCallingGATK = false + runGATK4_BQSR = true + /*========================== + Settings + /*==========================*/ + + // Standard options + gencode = false + singleEnd = true + MergeFQ = true + help= false + customQC = true + + // Library strandedness + stranded = true + revstranded = false + unstranded = false +} From 52ed28d2bd5ea43daddc5d6f5b4c8807c98183a0 Mon Sep 17 00:00:00 2001 From: Flip Mulder <47565214+ffmmulder@users.noreply.github.com> Date: Sun, 4 Jul 2021 09:23:08 +0200 Subject: [PATCH 3/4] Delete UMCU_hg19_PE.config --- confs/UMCU_hg19_PE.config | 437 -------------------------------------- 1 file changed, 437 deletions(-) delete mode 100755 confs/UMCU_hg19_PE.config diff --git a/confs/UMCU_hg19_PE.config b/confs/UMCU_hg19_PE.config deleted file mode 100755 index 5497cdd..0000000 --- a/confs/UMCU_hg19_PE.config +++ /dev/null @@ -1,437 +0,0 @@ -manifest { - homePage = 'https://github.com/UMCUGenetics/RNASeq-NF' - description = 'RNASeq-NF is an NGS analysis pipeline for RNA expression quantification' - mainScript = 'main.nf' - version = '1.0.4' - nextflowVersion = '20.04.1' -} - -params { - - /*==========================ƒ - Reference settings - /*==========================*/ - genome_fasta = '/hpc/cog_bioinf/GENOMES/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.fa' - genome_gtf = '/hpc/cog_bioinf/GENOMES/STAR/Homo_sapiens.GRCh37/Homo_sapiens.GRCh37.74.gtf' - genome_bed = '/hpc/cog_bioinf/ubec/tools/RSeQC/Homo_sapiens.GRCh37.74.bed12' - genome_dict = '/hpc/cog_bioinf/GENOMES/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.dict' - genome_index = '/hpc/cog_bioinf/GENOMES/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.fa.fai' - genome_known_sites = ['/hpc/cog_bioinf/common_dbs/GATK_bundle/1000G_phase1.indels.b37.vcf', - '/hpc/cog_bioinf/common_dbs/GATK_bundle/dbsnp_137.b37.vcf', - '/hpc/cog_bioinf/common_dbs/GATK_bundle/Mills_and_1000G_gold_standard.indels.b37.vcf'] - scatter_interval_list = '/hpc/cog_bioinf/ubec/tools/Homo_sapiens.GRCh37.GATK.illumina.chromosomes.interval_list' - star_index = '/hpc/cog_bioinf/GENOMES/STAR/2.7.3a/Homo_sapiens.GRCh37.GATK.illumina' - salmon_index = '/hpc/cog_bioinf/GENOMES/Salmon/1.2.1/Homo_sapiens.GRCh37.75.cdna.all' - transcripts_fasta = '/hpc/cog_bioinf/GENOMES/RSEM/GRCh37/GRCh37.transcripts.fa' - - - /*==========================ƒ - workflow steps - /*==========================*/ - - // Pre-alignment processing. - runTrimGalore = true - runSortMeRNA = true - - // Alignment - STAR - Sambamba markdup - runMapping = true - - // Counting - SubRead / FeatureCounts - runFeatureCounts = true - - // Salmon - runSalmon = false - - // QC - runFastQC = true - runMultiQC = true - runPostQC = true - runRSeQC_TIN = true - - // GATK (v4) - runGermlineCallingGATK = false - runGATK4_BQSR = true - /*========================== - Settings - /*==========================*/ - - // Standard options - gencode = false - singleEnd =false - MergeFQ = true - help= false - customQC = true - - // CustomQC template - rmd_template = "$baseDir/bin/RNASeqNF_QC.Rmd" - - // Standard references - rRNA_database_manifest = "$baseDir/assets/sortmerna-db-default.txt" - - // Library strandedness - stranded = false - revstranded = true - unstranded = false - - // Custom settings of tools. - options.FastQC = '' - options.TrimGalore = '--fastqc' - options.SortMeRNA = '' - options.STAR = '--outReadsUnmapped Fastx' - options.FeatureCounts = '' - options.Salmon_quant = '--seqBias --useVBOpt --gcBias' - options.Salmon_index = '' - options.Salmon_quantmerge = '' - options.GATK4_SplitIntervals = '-SCATTER_COUNT 100 --UNIQUE true -M BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW' - options.GATK4_HaplotypeCaller = '--standard-min-confidence-threshold-for-calling 20.0 --dont-use-soft-clipped-bases' - options.GATK4_VariantFiltration = '--cluster-window-size 35 --cluster-size 3' - options.GATK4_BQRS = '' - options.Sambamba_MergeBams = '' - options.Preseq = "-v -B -D" - options.MultiQC = '' - - //featureCounts - fc_group_features = 'gene_id' - fc_count_type = 'exon' - fc_group_features_type = 'gene_biotype' - fc_extra_attributes = 'gene_name' - normalize_counts = true - biotypeQC = true - - // Salmon additional options - saveUnaligned = false - - //Memory settings for SGE profile - sambambamarkdup.mem = '32G' - splitintervals.mem = '12G' - baserecalibrator.mem = '9G' - gatherbaserecalibrator.mem = '6G' - applybqsr.mem = '9G' - haplotypecaller.mem = '25G' - mergevcf.mem = '14G' - variantfiltration.mem = '10G' - mergebams.mem = '5G' - multiqc.mem = '4G' - -} - -process { - errorStrategy = 'retry' - maxRetries = 1 - withLabel : STAR_2_7_3a_AlignReads { - time = '24h' - penv = 'threaded' - cpus = 4 - memory = '35G' - publishDir.path = "${params.out_dir}/STAR" - publishDir.mode = 'copy' - publishDir.saveAs = {filename -> if (filename.indexOf("_Log.out") > 0) "logs/$filename" - else if (filename.indexOf("_Log.final.out") > 0) "logs/$filename" - else if (filename.indexOf(".bam") > 0) "mapped/$filename" - else if (filename.indexOf("_SJ.out.tab") > 0) "SJ/$filename" - else if (filename.indexOf(".mate1") > 0) "unmapped/$filename" - else if (filename.indexOf(".mate2") > 0) "unmapped/$filename" } - - } - withLabel : Sambamba_0_7_0_Index { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '10G' - publishDir.path = "${params.out_dir}/STAR" - publishDir.mode = 'copy' - publishDir.saveAs = {filename -> if (filename.indexOf(".bai") > 0) "mapped/$filename" } - } - withLabel : Sambamba_0_7_0_Flagstat { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '5G' - publishDir.path = "${params.out_dir}/Sambamba/Flagstats" - publishDir.mode = 'copy' - - } - withLabel: RSeQC_3_0_1 { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '25G' - publishDir.path = "${params.out_dir}/QC/RSeQC" - publishDir.mode = 'copy' - publishDir.saveAs = {filename -> - if (filename.indexOf("bam_stat.txt") > 0) "bam_stat/$filename" - else if (filename.indexOf("infer_experiment.txt") > 0) "infer_experiment/$filename" - else if (filename.indexOf("read_distribution.txt") > 0) "read_distribution/$filename" - else if (filename.indexOf("read_duplication.DupRate_plot.pdf") > 0) "read_duplication/$filename" - else if (filename.indexOf("read_duplication.DupRate_plot.r") > 0) "read_duplication/rscripts/$filename" - else if (filename.indexOf("read_duplication.pos.DupRate.xls") > 0) "read_duplication/dup_pos/$filename" - else if (filename.indexOf("read_duplication.seq.DupRate.xls") > 0) "read_duplication/dup_seq/$filename" - else if (filename.indexOf("RPKM_saturation.eRPKM.xls") > 0) "RPKM_saturation/rpkm/$filename" - else if (filename.indexOf("RPKM_saturation.rawCount.xls") > 0) "RPKM_saturation/counts/$filename" - else if (filename.indexOf("RPKM_saturation.saturation.pdf") > 0) "RPKM_saturation/$filename" - else if (filename.indexOf("RPKM_saturation.saturation.r") > 0) "RPKM_saturation/rscripts/$filename" - else if (filename.indexOf("inner_distance.txt") > 0) "inner_distance/$filename" - else if (filename.indexOf("inner_distance_freq.txt") > 0) "inner_distance/data/$filename" - else if (filename.indexOf("inner_distance_plot.r") > 0) "inner_distance/rscripts/$filename" - else if (filename.indexOf("inner_distance_plot.pdf") > 0) "inner_distance/plots/$filename" - else if (filename.indexOf("junction_plot.r") > 0) "junction_annotation/rscripts/$filename" - else if (filename.indexOf("junction.xls") > 0) "junction_annotation/data/$filename" - else if (filename.indexOf(".junction_annotation_log.txt") > 0) "junction_annotation/log/$filename" - else if (filename.indexOf("splice_events.pdf") > 0) "junction_annotation/events/$filename" - else if (filename.indexOf("splice_junction.pdf") > 0) "junction_annotation/junctions/$filename" - else if (filename.indexOf("junctionSaturation_plot.pdf") > 0) "junction_saturation/$filename" - else if (filename.indexOf("junctionSaturation_plot.r") > 0) "junction_saturation/rscripts/$filename" - else filename } - } - withLabel: RSeQC_3_0_1_TIN { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '35G' - publishDir.path = "${params.out_dir}/QC/RSeQC/TIN" - publishDir.mode = 'copy' - } - withLabel: Preseq_2_0_3 { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '15G' - publishDir.path = "${params.out_dir}/QC/Preseq" - publishDir.mode = 'copy' - publishDir.saveAs = {filename -> if (filename.indexOf(".ccurve.txt") > 0) "complexity_curve/$filename" } - } - withLabel : Sambamba_0_7_0_Markdup { - time = '24h' - penv = 'threaded' - cpus = 6 - memory = '32G' - //publishDir.path = "${params.out_dir}/Sambamba/Markdup" - //publishDir.mode = 'copy' - } - withLabel : GATK_4_1_3_0_SplitNCigarReads { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '12G' - //publishDir.path = "${params.out_dir}/GATK/bam" - } - withLabel : Salmon_1_2_1_Quant { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '25G' - publishDir.path = "${params.out_dir}/Salmon" - publishDir.mode = 'copy' - } - withLabel : Salmon_1_2_1_QuantMerge { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '5G' - publishDir.path = "${params.out_dir}/Salmon/merged" - publishDir.mode = 'copy' - } - withLabel : Salmon_1_2_1_Index { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '25G' - publishDir.path = "${params.out_dir}/reference_resources/Salmon" - publishDir.mode = 'copy' - } - withLabel: GATK_4_1_3_0_SplitIntervals { - memory = '12G' - time = '1h' - } - withLabel : GATK_4_1_3_0_BaseRecalibration { - memory = '9G' - time = '1h' - } - withLabel : GATK_4_1_3_0_GatherBaseRecalibrationTables { - memory = '6G' - time = '1h' - } - withLabel : GATK_4_1_3_0_BaseRecalibrationTable { - memory = '9G' - time = '1h' - } - withLabel : GATK_4_1_3_0_HaplotypeCaller { - memory = '25G' - time = '12h' - } - withLabel : GATK_4_1_3_0_MergeVCFs { - memory = '14G' - time = '4h' - publishDir.path = "${params.out_dir}/GATK/vcf/raw" - publishDir.mode = 'copy' - } - withLabel : GATK_4_1_3_0_VariantFiltration { - memory = '10G' - time = '4h' - publishDir.path = "${params.out_dir}/GATK/vcf/filtered" - publishDir.mode = 'copy' - } - withLabel : Sambamba_0_7_0_Merge { - cpus = 10 - penv = 'threaded' - memory = '5G' - time = '2h' - //publishDir.path = "${params.out_dir}/GATK/bam" - } - withLabel : MultiQC_1_9 { - memory = '4G' - time = '1h' - publishDir.path = "${params.out_dir}/report/MultiQC" - publishDir.mode = 'copy' - } - withLabel : rnaseqnfqc_1_0_0 { - memory = '4G' - time = '1h' - publishDir.path = "${params.out_dir}/report/Custom_QC" - publishDir.mode = 'copy' - } - withLabel : biconductor_3_20_7_edger_normalize { - memory = '5G' - time = '1h' - publishDir.path = "${params.out_dir}/featureCounts" - publishDir.mode = 'copy' - } - withLabel : biconductor_1_28_0_deseq2normalize { - memory = '5G' - time = '1h' - publishDir.path = "${params.out_dir}/featureCounts" - publishDir.mode = 'copy' - - } - withLabel : Subread_2_0_0_FeatureCounts { - time = '24h' - penv = 'threaded' - cpus = 10 - memory = '25G' - publishDir.path = "${params.out_dir}/featureCounts" - publishDir.mode = 'copy' - publishDir.saveAs = {filename -> if (filename.indexOf("_biotype_") > 0) "biotypes/$filename" - else filename } - - } - withLabel : STAR_2_7_3a_GenomeGenerate { - time = '24h' - penv = 'threaded' - cpus = 12 - memory = '150G' - publishDir.path = "${params.out_dir}/reference_resources/STAR" - publishDir.mode = 'copy' - } - withLabel : ucsc_377_GenePredToBed { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '15G' - publishDir.path = "${params.out_dir}/reference_resources" - publishDir.mode = 'copy' - } - withLabel : ucsc_377_GtfToGenePred { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '15G' - } - withLabel : CreateIntervalList { - time = '1h' - publishDir.path = "${params.out_dir}/reference_resources" - publishDir.mode = 'copy' - } - withLabel : FASTQC_0_11_8 { - time = '1h' - publishDir.path = "${params.out_dir}/QC/FastQC/raw" - publishDir.mode = 'copy' - } - withLabel : SortMeRNA_4_2_0 { - time = '24h' - penv = 'threaded' - cpus = 4 - memory = '15G' - publishDir.path = "${params.out_dir}/QC/" - publishDir.mode = 'copy' - publishDir.saveAs = {filename -> - if (filename.indexOf("_rRNA_report.txt") > 0) "SorteMeRNA/logs/$filename" - else if (filename.indexOf("_filtered_rRNA.fastq.gz") > 0) "SorteMeRNA/rRNA-reads/$filename" - else null } - - } - withLabel : TrimGalore_0_6_5 { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '6G' - publishDir.path = "${params.out_dir}/QC/TrimGalore" - publishDir.mode = 'copy' - publishDir.saveAs = {filename -> - if (filename.indexOf("_fastqc") > 0) "FastQC/$filename" - else if (filename.indexOf("trimming_report.txt") > 0) "logs/$filename" - else null } - } -} - -executor { - $sge { - queueSize = 1000 - pollInterval = '30sec' - queueStatInterval = '5min' - } - $slurm { - queueSize = 1000 - pollInterval = '30sec' - queueStatInterval = '5min' - } -} - -profiles { - sge { - process.executor = 'sge' - process.queue = 'all.q' - process.clusterOptions = '-P compgen ' - } - - slurm { - process.executor = 'slurm' - process.clusterOptions = '--gres=tmpspace:25G' - process.queue = 'cpu' - } -} - -report { - enabled = true - file = "$params.out_dir/log/nextflow_report.html" -} - -trace { - enabled = true - file = "$params.out_dir/log/nextflow_trace.txt" - fields = 'task_id,hash,native_id,process,tag,name,status,exit,module,container,cpus,time,disk,memory,attempt,submit,start,complete,duration,realtime,queue,%cpu,%mem,rss,vmem,peak_rss,peak_vmem,rchar,wchar,syscr,syscw,read_bytes,write_bytes,vol_ctxt,inv_ctxt' -} - -timeline { - enabled = true - file = "$params.out_dir/log/nextflow_timeline.html" -} - -singularity { - enabled = true - autoMounts = true - runOptions = '-B /hpc -B $TMPDIR:$TMPDIR' - cacheDir = '/hpc/local/CentOS7/cog_bioinf/singularity_cache' -} - -cleanup = true - - - - - - - - - - - From f9d85ac006a2c7392458ae68fe3a92569d72f5a2 Mon Sep 17 00:00:00 2001 From: Flip Mulder <47565214+ffmmulder@users.noreply.github.com> Date: Sun, 4 Jul 2021 09:23:16 +0200 Subject: [PATCH 4/4] Delete UMCU_hg19_SE.config --- confs/UMCU_hg19_SE.config | 437 -------------------------------------- 1 file changed, 437 deletions(-) delete mode 100755 confs/UMCU_hg19_SE.config diff --git a/confs/UMCU_hg19_SE.config b/confs/UMCU_hg19_SE.config deleted file mode 100755 index c0375f3..0000000 --- a/confs/UMCU_hg19_SE.config +++ /dev/null @@ -1,437 +0,0 @@ - manifest { - homePage = 'https://github.com/UMCUGenetics/RNASeq-NF' - description = 'RNASeq-NF is an NGS analysis pipeline for RNA expression quantification' - mainScript = 'main.nf' - version = '1.0.4' - nextflowVersion = '20.04.1' -} - -params { - - /*==========================ƒ - Reference settings - /*==========================*/ - genome_fasta = '/hpc/cog_bioinf/GENOMES/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.fa' - genome_gtf = '/hpc/cog_bioinf/GENOMES/STAR/Homo_sapiens.GRCh37/Homo_sapiens.GRCh37.74.gtf' - genome_bed = '/hpc/cog_bioinf/ubec/tools/RSeQC/Homo_sapiens.GRCh37.74.bed12' - genome_dict = '/hpc/cog_bioinf/GENOMES/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.dict' - genome_index = '/hpc/cog_bioinf/GENOMES/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.fa.fai' - genome_known_sites = ['/hpc/cog_bioinf/common_dbs/GATK_bundle/1000G_phase1.indels.b37.vcf', - '/hpc/cog_bioinf/common_dbs/GATK_bundle/dbsnp_137.b37.vcf', - '/hpc/cog_bioinf/common_dbs/GATK_bundle/Mills_and_1000G_gold_standard.indels.b37.vcf'] - scatter_interval_list = '/hpc/cog_bioinf/ubec/tools/Homo_sapiens.GRCh37.GATK.illumina.chromosomes.interval_list' - star_index = '/hpc/cog_bioinf/GENOMES/STAR/2.7.3a/Homo_sapiens.GRCh37.GATK.illumina' - salmon_index = '/hpc/cog_bioinf/GENOMES/Salmon/1.2.1/Homo_sapiens.GRCh37.75.cdna.all' - transcripts_fasta = '/hpc/cog_bioinf/GENOMES/RSEM/GRCh37/GRCh37.transcripts.fa' - - - /*==========================ƒ - workflow steps - /*==========================*/ - - // Pre-alignment processing. - runTrimGalore = true - runSortMeRNA = true - - // Alignment - STAR - Sambamba markdup - runMapping = true - - // Counting - SubRead / FeatureCounts - runFeatureCounts = true - - // Salmon - runSalmon = false - - // QC - runFastQC = true - runMultiQC = true - runPostQC = true - runRSeQC_TIN = true - - // GATK (v4) - runGermlineCallingGATK = false - runGATK4_BQSR = true - /*========================== - Settings - /*==========================*/ - - // Standard options - gencode = false - singleEnd = true - MergeFQ = true - help= false - customQC = true - - // CustomQC template - rmd_template = "$baseDir/bin/RNASeqNF_QC.Rmd" - - // Standard references - rRNA_database_manifest = "$baseDir/assets/sortmerna-db-default.txt" - - // Library strandedness - stranded = true - revstranded = false - unstranded = false - - // Custom settings of tools. - options.FastQC = '' - options.TrimGalore = '--fastqc' - options.SortMeRNA = '' - options.STAR = '--outReadsUnmapped Fastx' - options.FeatureCounts = '' - options.Salmon_quant = '--seqBias --useVBOpt --gcBias' - options.Salmon_index = '' - options.Salmon_quantmerge = '' - options.GATK4_SplitIntervals = '-SCATTER_COUNT 100 --UNIQUE true -M BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW' - options.GATK4_HaplotypeCaller = '--standard-min-confidence-threshold-for-calling 20.0 --dont-use-soft-clipped-bases' - options.GATK4_VariantFiltration = '--cluster-window-size 35 --cluster-size 3' - options.GATK4_BQRS = '' - options.Sambamba_MergeBams = '' - options.Preseq = "-v -B -D" - options.MultiQC = '' - - //featureCounts - fc_group_features = 'gene_id' - fc_count_type = 'exon' - fc_group_features_type = 'gene_biotype' - fc_extra_attributes = 'gene_name' - normalize_counts = true - biotypeQC = true - - // Salmon additional options - saveUnaligned = false - - //Memory settings for SGE profile - sambambamarkdup.mem = '32G' - splitintervals.mem = '12G' - baserecalibrator.mem = '9G' - gatherbaserecalibrator.mem = '6G' - applybqsr.mem = '9G' - haplotypecaller.mem = '25G' - mergevcf.mem = '14G' - variantfiltration.mem = '10G' - mergebams.mem = '5G' - multiqc.mem = '4G' - -} - -process { - errorStrategy = 'retry' - maxRetries = 1 - withLabel : STAR_2_7_3a_AlignReads { - time = '24h' - penv = 'threaded' - cpus = 4 - memory = '35G' - publishDir.path = "${params.out_dir}/STAR" - publishDir.mode = 'copy' - publishDir.saveAs = {filename -> if (filename.indexOf("_Log.out") > 0) "logs/$filename" - else if (filename.indexOf("_Log.final.out") > 0) "logs/$filename" - else if (filename.indexOf(".bam") > 0) "mapped/$filename" - else if (filename.indexOf("_SJ.out.tab") > 0) "SJ/$filename" - else if (filename.indexOf(".mate1") > 0) "unmapped/$filename" - else if (filename.indexOf(".mate2") > 0) "unmapped/$filename" } - - } - withLabel : Sambamba_0_7_0_Index { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '10G' - publishDir.path = "${params.out_dir}/STAR" - publishDir.mode = 'copy' - publishDir.saveAs = {filename -> if (filename.indexOf(".bai") > 0) "mapped/$filename" } - } - withLabel : Sambamba_0_7_0_Flagstat { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '5G' - publishDir.path = "${params.out_dir}/Sambamba/Flagstats" - publishDir.mode = 'copy' - - } - withLabel: RSeQC_3_0_1 { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '25G' - publishDir.path = "${params.out_dir}/QC/RSeQC" - publishDir.mode = 'copy' - publishDir.saveAs = {filename -> - if (filename.indexOf("bam_stat.txt") > 0) "bam_stat/$filename" - else if (filename.indexOf("infer_experiment.txt") > 0) "infer_experiment/$filename" - else if (filename.indexOf("read_distribution.txt") > 0) "read_distribution/$filename" - else if (filename.indexOf("read_duplication.DupRate_plot.pdf") > 0) "read_duplication/$filename" - else if (filename.indexOf("read_duplication.DupRate_plot.r") > 0) "read_duplication/rscripts/$filename" - else if (filename.indexOf("read_duplication.pos.DupRate.xls") > 0) "read_duplication/dup_pos/$filename" - else if (filename.indexOf("read_duplication.seq.DupRate.xls") > 0) "read_duplication/dup_seq/$filename" - else if (filename.indexOf("RPKM_saturation.eRPKM.xls") > 0) "RPKM_saturation/rpkm/$filename" - else if (filename.indexOf("RPKM_saturation.rawCount.xls") > 0) "RPKM_saturation/counts/$filename" - else if (filename.indexOf("RPKM_saturation.saturation.pdf") > 0) "RPKM_saturation/$filename" - else if (filename.indexOf("RPKM_saturation.saturation.r") > 0) "RPKM_saturation/rscripts/$filename" - else if (filename.indexOf("inner_distance.txt") > 0) "inner_distance/$filename" - else if (filename.indexOf("inner_distance_freq.txt") > 0) "inner_distance/data/$filename" - else if (filename.indexOf("inner_distance_plot.r") > 0) "inner_distance/rscripts/$filename" - else if (filename.indexOf("inner_distance_plot.pdf") > 0) "inner_distance/plots/$filename" - else if (filename.indexOf("junction_plot.r") > 0) "junction_annotation/rscripts/$filename" - else if (filename.indexOf("junction.xls") > 0) "junction_annotation/data/$filename" - else if (filename.indexOf(".junction_annotation_log.txt") > 0) "junction_annotation/log/$filename" - else if (filename.indexOf("splice_events.pdf") > 0) "junction_annotation/events/$filename" - else if (filename.indexOf("splice_junction.pdf") > 0) "junction_annotation/junctions/$filename" - else if (filename.indexOf("junctionSaturation_plot.pdf") > 0) "junction_saturation/$filename" - else if (filename.indexOf("junctionSaturation_plot.r") > 0) "junction_saturation/rscripts/$filename" - else filename } - } - withLabel: RSeQC_3_0_1_TIN { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '35G' - publishDir.path = "${params.out_dir}/QC/RSeQC/TIN" - publishDir.mode = 'copy' - } - withLabel: Preseq_2_0_3 { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '15G' - publishDir.path = "${params.out_dir}/QC/Preseq" - publishDir.mode = 'copy' - publishDir.saveAs = {filename -> if (filename.indexOf(".ccurve.txt") > 0) "complexity_curve/$filename" } - } - withLabel : Sambamba_0_7_0_Markdup { - time = '24h' - penv = 'threaded' - cpus = 6 - memory = '32G' - //publishDir.path = "${params.out_dir}/Sambamba/Markdup" - //publishDir.mode = 'copy' - } - withLabel : GATK_4_1_3_0_SplitNCigarReads { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '12G' - //publishDir.path = "${params.out_dir}/GATK/bam" - } - withLabel : Salmon_1_2_1_Quant { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '25G' - publishDir.path = "${params.out_dir}/Salmon" - publishDir.mode = 'copy' - } - withLabel : Salmon_1_2_1_QuantMerge { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '5G' - publishDir.path = "${params.out_dir}/Salmon/merged" - publishDir.mode = 'copy' - } - withLabel : Salmon_1_2_1_Index { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '25G' - publishDir.path = "${params.out_dir}/reference_resources/Salmon" - publishDir.mode = 'copy' - } - withLabel: GATK_4_1_3_0_SplitIntervals { - memory = '12G' - time = '1h' - } - withLabel : GATK_4_1_3_0_BaseRecalibration { - memory = '9G' - time = '1h' - } - withLabel : GATK_4_1_3_0_GatherBaseRecalibrationTables { - memory = '6G' - time = '1h' - } - withLabel : GATK_4_1_3_0_BaseRecalibrationTable { - memory = '9G' - time = '1h' - } - withLabel : GATK_4_1_3_0_HaplotypeCaller { - memory = '25G' - time = '12h' - } - withLabel : GATK_4_1_3_0_MergeVCFs { - memory = '14G' - time = '4h' - publishDir.path = "${params.out_dir}/GATK/vcf/raw" - publishDir.mode = 'copy' - } - withLabel : GATK_4_1_3_0_VariantFiltration { - memory = '10G' - time = '4h' - publishDir.path = "${params.out_dir}/GATK/vcf/filtered" - publishDir.mode = 'copy' - } - withLabel : Sambamba_0_7_0_Merge { - cpus = 10 - penv = 'threaded' - memory = '5G' - time = '2h' - //publishDir.path = "${params.out_dir}/GATK/bam" - } - withLabel : MultiQC_1_9 { - memory = '4G' - time = '1h' - publishDir.path = "${params.out_dir}/report/MultiQC" - publishDir.mode = 'copy' - } - withLabel : rnaseqnfqc_1_0_0 { - memory = '4G' - time = '1h' - publishDir.path = "${params.out_dir}/report/Custom_QC" - publishDir.mode = 'copy' - } - withLabel : biconductor_3_20_7_edger_normalize { - memory = '5G' - time = '1h' - publishDir.path = "${params.out_dir}/featureCounts" - publishDir.mode = 'copy' - } - withLabel : biconductor_1_28_0_deseq2normalize { - memory = '5G' - time = '1h' - publishDir.path = "${params.out_dir}/featureCounts" - publishDir.mode = 'copy' - - } - withLabel : Subread_2_0_0_FeatureCounts { - time = '24h' - penv = 'threaded' - cpus = 10 - memory = '25G' - publishDir.path = "${params.out_dir}/featureCounts" - publishDir.mode = 'copy' - publishDir.saveAs = {filename -> if (filename.indexOf("_biotype_") > 0) "biotypes/$filename" - else filename } - - } - withLabel : STAR_2_7_3a_GenomeGenerate { - time = '24h' - penv = 'threaded' - cpus = 12 - memory = '150G' - publishDir.path = "${params.out_dir}/reference_resources/STAR" - publishDir.mode = 'copy' - } - withLabel : ucsc_377_GenePredToBed { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '15G' - publishDir.path = "${params.out_dir}/reference_resources" - publishDir.mode = 'copy' - } - withLabel : ucsc_377_GtfToGenePred { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '15G' - } - withLabel : CreateIntervalList { - time = '1h' - publishDir.path = "${params.out_dir}/reference_resources" - publishDir.mode = 'copy' - } - withLabel : FASTQC_0_11_8 { - time = '1h' - publishDir.path = "${params.out_dir}/QC/FastQC/raw" - publishDir.mode = 'copy' - } - withLabel : SortMeRNA_4_2_0 { - time = '24h' - penv = 'threaded' - cpus = 4 - memory = '15G' - publishDir.path = "${params.out_dir}/QC/" - publishDir.mode = 'copy' - publishDir.saveAs = {filename -> - if (filename.indexOf("_rRNA_report.txt") > 0) "SorteMeRNA/logs/$filename" - else if (filename.indexOf("_filtered_rRNA.fastq.gz") > 0) "SorteMeRNA/rRNA-reads/$filename" - else null } - - } - withLabel : TrimGalore_0_6_5 { - time = '24h' - penv = 'threaded' - cpus = 2 - memory = '6G' - publishDir.path = "${params.out_dir}/QC/TrimGalore" - publishDir.mode = 'copy' - publishDir.saveAs = {filename -> - if (filename.indexOf("_fastqc") > 0) "FastQC/$filename" - else if (filename.indexOf("trimming_report.txt") > 0) "logs/$filename" - else null } - } -} - -executor { - $sge { - queueSize = 1000 - pollInterval = '30sec' - queueStatInterval = '5min' - } - $slurm { - queueSize = 1000 - pollInterval = '30sec' - queueStatInterval = '5min' - } -} - -profiles { - sge { - process.executor = 'sge' - process.queue = 'all.q' - process.clusterOptions = '-P compgen ' - } - - slurm { - process.executor = 'slurm' - process.clusterOptions = '--gres=tmpspace:25G' - process.queue = 'cpu' - } -} - -report { - enabled = true - file = "$params.out_dir/log/nextflow_report.html" -} - -trace { - enabled = true - file = "$params.out_dir/log/nextflow_trace.txt" - fields = 'task_id,hash,native_id,process,tag,name,status,exit,module,container,cpus,time,disk,memory,attempt,submit,start,complete,duration,realtime,queue,%cpu,%mem,rss,vmem,peak_rss,peak_vmem,rchar,wchar,syscr,syscw,read_bytes,write_bytes,vol_ctxt,inv_ctxt' -} - -timeline { - enabled = true - file = "$params.out_dir/log/nextflow_timeline.html" -} - -singularity { - enabled = true - autoMounts = true - runOptions = '-B /hpc -B $TMPDIR:$TMPDIR' - cacheDir = '/hpc/local/CentOS7/cog_bioinf/singularity_cache' -} - -cleanup = true - - - - - - - - - - -