Skip to content

Commit

Permalink
address #1 - creates index for large genomes where output contains SQ…
Browse files Browse the repository at this point in the history
… fields
  • Loading branch information
KristinaGagalova committed Jan 13, 2024
1 parent a56c036 commit d2be179
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 4 deletions.
39 changes: 36 additions & 3 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ switch (workflow_input) {
reads = Channel.fromPath("${reads}", checkIfExists: true)
.map {[ it.simpleName, it ]}
break;
case ["genome-mapping"]:
include { minimap_mapping; minimap_create_index } from './modules/module_reads_mapping.nf'
case ["genome-mapping", "genome-mapping-large"]:
include { minimap_mapping; minimap_mapping_large; minimap_create_index } from './modules/module_reads_mapping.nf'
include { stats_mapping ; run_multiqc_stats } from './modules/module_mapping_stats.nf'
include { run_feature_counts } from './modules/module_reads_counts.nf'
genome = file(params.genome_nuc)
Expand Down Expand Up @@ -152,6 +152,34 @@ workflow GENOME_MAPPING {
run_multiqc_stats(stats)
}

workflow GENOME_MAPPING_LARGE {
take:
genome
genes
reads

main:
// create index and save on disk
index = minimap_create_index(genome)

// map ONT reads
mapped_out = minimap_mapping_large(index.
minimap_index
.collect(),
reads)

// Get counts for genes
run_feature_counts(mapped_out.minimap_align, genome, genes)

//QC and stats
stats_out = stats_mapping(mapped_out.minimap_align)
stats_out.stats
.map { it -> it[1] }
.collect()
.set { stats }
run_multiqc_stats(stats)
}

workflow {

switchVariable = 0
Expand All @@ -166,7 +194,9 @@ workflow {
switchVariable = 4;
} else if (workflow_input == "genome-mapping") {
switchVariable = 5;
}
} else if (workflow_input == "genome-mapping-large") {
switchVariable = 6;
}

switch (switchVariable) {
case 1:
Expand All @@ -184,6 +214,9 @@ workflow {
case 5:
GENOME_MAPPING(genome, genes, reads);
break;
case 6:
GENOME_MAPPING_LARGE(genome, genes, reads);
break;
default:
println("Please provide the correct input options")
break;
Expand Down
28 changes: 28 additions & 0 deletions modules/module_reads_mapping.nf
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,31 @@ process minimap_mapping {
${reads} | samtools sort -o ${sample}.bam
"""
}

process minimap_mapping_large {
'''
Function creates index and maps reads foir large genomes 4Gb
'''
time '1d'
label 'big_task'
tag "minimap mapping: ${sample}"

publishDir "${outdir}/alignements/${workflow}", mode: 'copy'

input:
path(genome)
tuple val(sample), path(reads)

output:
tuple val(sample), path("${sample}.bam"), emit: minimap_align

"""
minimap2 \
-ax splice \
-secondary=no \
--split-prefix /tmp/temp_ \
-t ${task.cpus} \
${genome} \
${reads} | samtools sort -o ${sample}.bam
"""
}
1 change: 1 addition & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"reads-filter",
"chloroplast-contamination",
"genome-mapping",
"genome-mapping-large",
"isoform-analysis"
]
},
Expand Down
2 changes: 1 addition & 1 deletion runme.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ nextflow run ./main.nf \
-profile local,singularity \
-resume \
--input_dir "results/barcodes_concat/barcode*" \
--workflow "genome-mapping" \
--workflow "genome-mapping-large" \
--output_dir "results" \
--genome_nuc "/home/kgagalova/Hordeum_vulgare.MorexV3_pseudomolecules_assembly.dna.toplevel_nams.fa" \
--genes "/home/kgagalova/Hv_Morex.pgsb.Jul2020.gtf" \
Expand Down

0 comments on commit d2be179

Please sign in to comment.