diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b80d04b..325ce84b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +<<<<<<< HEAD ## 0.4.0 - [2024-11-22] ### `Added` @@ -193,12 +194,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 > Version has been removed if new version information isn't present. ## 0.3.2 - [2024-09-20] +======= +## v0.3.2 - [2024-09-20] +>>>>>>> master ### `Fixed` - [#396](https://github.com/genomic-medicine-sweden/nallo/pull/396) - Fixed the release test profile not working, by pinning the testdata used [#395](https://github.com/genomic-medicine-sweden/nallo/issues/395) +<<<<<<< HEAD ## 0.3.1 - [2024-09-11] +======= +## v0.3.1 - [2024-09-11] +>>>>>>> master ### `Fixed` diff --git a/nextflow_schema.json b/nextflow_schema.json index f283d38f..7f7c79ac 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -522,6 +522,147 @@ } }, "required": ["preset"] +<<<<<<< HEAD +======= + }, + "file_inputs": { + "title": "File inputs", + "type": "object", + "description": "The different files that are required. Some are only required by certain workflows, see the usage documentation.", + "default": "", + "fa_icon": "fas fa-copy", + "properties": { + "cadd_prescored": { + "type": "string", + "exists": true, + "format": "directory-path", + "fa_icon": "fas fa-file", + "description": "Path to a directory containing prescored indels for CADD.", + "help_text": "This folder contains the compressed files and indexes that would otherwise be in data/prescored folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation." + }, + "cadd_resources": { + "type": "string", + "exists": true, + "format": "directory-path", + "fa_icon": "fas fa-file", + "description": "Path to a directory containing CADD annotations.", + "help_text": "This folder contains the uncompressed files that would otherwise be in data/annotation folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation." + }, + "par_regions": { + "type": "string", + "description": "Provide a bed file of chrX and chrY PAR regions for dipcall and DeepVariant", + "format": "file-path", + "exists": true + }, + "tandem_repeats": { + "type": "string", + "format": "file-path", + "description": "A tandem repeat BED file for sniffles", + "pattern": "^\\S+\\.bed$", + "exists": true + }, + "trgt_repeats": { + "type": "string", + "description": "A BED file with repeats to be genotyped with TRGT", + "format": "file-path", + "exists": true + }, + "snp_db": { + "type": "string", + "pattern": "^\\S+\\.csv$", + "format": "file-path", + "mimetype": "text/csv", + "schema": "/assets/schema_snpdb.json", + "description": "A csv file with echtvar databases to annotate SNVs with", + "exists": true + }, + "variant_catalog": { + "type": "string", + "description": "A variant catalog json-file for stranger", + "format": "file-path", + "exists": true + }, + "variant_consequences_snv": { + "type": "string", + "description": "File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic SNVs.", + "help_text": "For more information check https://ensembl.org/info/genome/variation/prediction/predicted_data.html", + "fa_icon": "fas fa-file-csv" + }, + "vep_cache": { + "type": "string", + "description": "A path to the VEP cache location", + "format": "path", + "exists": true + }, + "bed": { + "type": "string", + "pattern": "^\\S+\\.bed$", + "format": "file-path", + "description": "A BED file with regions of interest, used to limit short variant calling.", + "exists": true + }, + "hificnv_xy": { + "type": "string", + "format": "file-path", + "description": "A BED file containing expected copy number regions for XY samples.", + "exists": true + }, + "hificnv_xx": { + "type": "string", + "format": "file-path", + "description": "A BED file containing expected copy number regions for XX samples.", + "exists": true + }, + "hificnv_exclude": { + "type": "string", + "format": "file-path", + "description": "A BED file specifying regions to exclude with HiFiCNV, such as centromeres.", + "exists": true + }, + "reduced_penetrance": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-file-csv", + "description": "A file with gene ids that have reduced penetrance. For use with genmod." + }, + "score_config_snv": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-file", + "description": "A SNV rank model config file for genmod." + }, + "somalier_sites": { + "type": "string", + "pattern": "^\\S+\\.vcf(\\.gz)?$", + "description": "A VCF of known polymorphic sites for somalier", + "format": "file-path", + "exists": true + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warning." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + }, + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo-0.3.2/", + "hidden": true + } + } +>>>>>>> master } }, "allOf": [ diff --git a/tests/main.nf.test b/tests/main.nf.test new file mode 100644 index 00000000..4976e9d5 --- /dev/null +++ b/tests/main.nf.test @@ -0,0 +1,345 @@ +nextflow_pipeline { + + name "Test pipeline GENOMICMEDICINESWEDEN_NALLO" + script "../main.nf" + profile "test" + tag "PIPELINE" + + test("test profile") { + tag "samplesheet" + + when { + params { + // Base directory for genomic-medicine-sweden/nallo test data + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/ba720cd29322036d966ab3e4bc4c3d03e1731af5/' + // Test files + fasta = params.pipelines_testdata_base_path + 'reference/hg38.test.fa.gz' + input = params.pipelines_testdata_base_path + 'testdata/samplesheet.csv' + bed = params.pipelines_testdata_base_path + 'reference/test_data.bed' + hificnv_xy = params.pipelines_testdata_base_path + 'reference/expected_cn.hg38.XY.bed' + hificnv_xx = params.pipelines_testdata_base_path + 'reference/expected_cn.hg38.XX.bed' + hificnv_exclude = params.pipelines_testdata_base_path + 'reference/empty.bed' + par_regions = params.pipelines_testdata_base_path + 'reference/hs38.PAR.bed' + trgt_repeats = params.pipelines_testdata_base_path + 'reference/pathogenic_repeats.hg38.bed' + variant_catalog = params.pipelines_testdata_base_path + 'reference/variant_catalog_grch38.json' + vep_cache = params.pipelines_testdata_base_path + 'reference/vep_cache_test_data.tar.gz' + vep_plugin_files = params.pipelines_testdata_base_path + 'reference/vep_plugin_files.csv' + snp_db = params.pipelines_testdata_base_path + 'testdata/snp_dbs.csv' + somalier_sites = params.pipelines_testdata_base_path + 'reference/somalier_sites.vcf.gz' + reduced_penetrance = params.pipelines_testdata_base_path + 'reference/reduced_penetrance.tsv' + score_config_snv = params.pipelines_testdata_base_path + 'reference/rank_model_snv.ini' + variant_consequences_snv = params.pipelines_testdata_base_path + 'reference/variant_consequences_v2.txt' + + // Parameters + parallel_snv = 2 + preset = "revio" + outdir = "$outputDir" + } + } + + then { + assertAll ( + { assert workflow.success }, + // Assert with snapshot multisample + { assert snapshot( + file("$outputDir/pedigree/test.ped"), + file("$outputDir/qc_aligned_reads/somalier/relate/test/test.pairs.tsv"), + file("$outputDir/qc_aligned_reads/somalier/relate/test/test.samples.tsv"), + file("$outputDir/qc_aligned_reads/somalier/relate/test/test.html"), + file("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), + file("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + file("$outputDir/multiqc/multiqc_data/multiqc_somalier.txt"), + // Assert with snapshot HG002_Revio + bam("$outputDir/aligned_reads/HG002_Revio/HG002_Revio_phased.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio/HG002_Revio.asm.bp.hap1.p_ctg.assembly_summary"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio/HG002_Revio.asm.bp.hap2.p_ctg.assembly_summary"), + bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.bam", stringency: 'silent').getReadsMD5(), + bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.dip.bed"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.bed"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.bed"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.HG002_Revio.copynum.bedgraph"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.HG002_Revio.depth.bw"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.HG002_Revio.maf.bw"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio/HG002_Revio.mosdepth.global.dist.txt"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio/HG002_Revio.mosdepth.region.dist.txt"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio/HG002_Revio.mosdepth.summary.txt"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio/HG002_Revio.asm.bp.hap1.p_ctg.fasta.gz"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio/HG002_Revio.asm.bp.hap2.p_ctg.fasta.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.dip.vcf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.paf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.var.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.paf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.var.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.pair.vcf.gz"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.HG002_Revio.vcf.gz"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.HG002_Revio.vcf.gz.tbi"), + file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio/HG002_Revio_modkit_pileup.bed.gz"), + file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio/HG002_Revio_modkit_pileup.bed.gz.tbi"), + bam("$outputDir/paraphase/HG002_Revio/HG002_Revio.paraphase.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio/HG002_Revio_cramino_aligned_phased.arrow"), + file("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio/HG002_Revio_cramino_aligned.arrow"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio/HG002_Revio.regions.bed.gz"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio/HG002_Revio.regions.bed.gz.csi"), + file("$outputDir/qc_raw_reads/fqcrs/HG002_Revio/HG002_PacBio_Revio.fastq.gz.tsv.zst"), + file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio/HG002_Revio_sorted.vcf.gz"), + file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio/HG002_Revio_sorted.vcf.gz.tbi"), + bam("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio/HG002_Revio_spanning_sorted.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/snvs/stats/single_sample/HG002_Revio.vcf.gz.bcftools_stats.txt").readLines()[0..2], + ).match() }, + // Assert exists multisample + { assert new File("$outputDir/databases/echtvar/encode/test/test.zip").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_data.json").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc.log").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_sources.txt").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt").exists() }, + { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz").exists() }, + { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/snvs/multi_sample/test/test_snv_annotated_ranked.vcf.gz").exists() }, + { assert new File("$outputDir/snvs/multi_sample/test/test_snv_annotated_ranked.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/multi_sample/test/test_sniffles.vcf.gz").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/multi_sample/test/test_sniffles.vcf.gz.tbi").exists() }, + // Assert exists HG002_Revio + { assert new File("$outputDir/aligned_reads/HG002_Revio/HG002_Revio_phased.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.sam.gz").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.sam.gz").exists() }, + { assert new File("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.log").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_1.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_1.bed.gz.tbi").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_2.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_2.bed.gz.tbi").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_ungrouped.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_ungrouped.bed.gz.tbi").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio/HG002_Revio.paraphase.bam.bai").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio/HG002_Revio_paraphase_vcfs/HG002_Revio_hba.vcf.gz").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio/HG002_Revio_paraphase_vcfs/HG002_Revio_hba.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio/HG002_Revio.paraphase.json").exists() }, + { assert new File("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio/HG002_Revio_cramino_aligned_phased.txt").exists() }, + { assert new File("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio/HG002_Revio_cramino_aligned.txt").exists() }, + { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio/HG002_Revio_phased.vcf.gz").exists() }, + { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio/HG002_Revio_phased.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio/HG002_Revio_stats.stats.tsv").exists() }, + { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio/HG002_Revio_stats.blocks.tsv").exists() }, + { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio/HG002_Revio_fastqc.html").exists() }, + { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio/HG002_Revio_fastqc.zip").exists() }, + { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio/HG002_Revio_repeat_expansion_stranger.vcf.gz").exists() }, + { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio/HG002_Revio_repeat_expansion_stranger.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio/HG002_Revio_spanning_sorted.bam.bai").exists() }, + { assert new File("$outputDir/snvs/single_sample/HG002_Revio/HG002_Revio_snv_annotated_ranked.vcf.gz").exists() }, + { assert new File("$outputDir/snvs/single_sample/HG002_Revio/HG002_Revio_snv_annotated_ranked.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio/HG002_Revio_sniffles.snf").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio/HG002_Revio_sniffles.vcf.gz").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio/HG002_Revio_sniffles.vcf.gz.tbi").exists() } + ) + } + } + + test("test profile - multisample") { + tag "samplesheet_multisample_bam" + + when { + params { + // Base directory for genomic-medicine-sweden/nallo test data + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo-0.3.2/' + // Test files + fasta = params.pipelines_testdata_base_path + 'reference/hg38.test.fa.gz' + input = params.pipelines_testdata_base_path + 'testdata/samplesheet_multisample_bam.csv' + bed = params.pipelines_testdata_base_path + 'reference/test_data.bed' + hificnv_xy = params.pipelines_testdata_base_path + 'reference/expected_cn.hg38.XY.bed' + hificnv_xx = params.pipelines_testdata_base_path + 'reference/expected_cn.hg38.XX.bed' + hificnv_exclude = params.pipelines_testdata_base_path + 'reference/empty.bed' + par_regions = params.pipelines_testdata_base_path + 'reference/hs38.PAR.bed' + trgt_repeats = params.pipelines_testdata_base_path + 'reference/pathogenic_repeats.hg38.bed' + variant_catalog = params.pipelines_testdata_base_path + 'reference/variant_catalog_grch38.json' + vep_cache = params.pipelines_testdata_base_path + 'reference/vep_cache_test_data.tar.gz' + vep_plugin_files = params.pipelines_testdata_base_path + 'reference/vep_plugin_files.csv' + snp_db = params.pipelines_testdata_base_path + 'testdata/snp_dbs.csv' + somalier_sites = params.pipelines_testdata_base_path + 'reference/somalier_sites.vcf.gz' + reduced_penetrance = params.pipelines_testdata_base_path + 'reference/reduced_penetrance.tsv' + score_config_snv = params.pipelines_testdata_base_path + 'reference/rank_model_snv.ini' + variant_consequences_snv = params.pipelines_testdata_base_path + 'reference/variant_consequences_v2.txt' + + // Parameters + parallel_snv = 2 + preset = "revio" + outdir = "$outputDir" + } + } + + then { + assertAll ( + { assert workflow.success }, + // Assert with snapshot multisample + { assert snapshot( + file("$outputDir/pedigree/test.ped"), + file("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), + file("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + // Assert with snapshot HG002_Revio_A + bam("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_phased.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_A/HG002_Revio_A.asm.bp.hap1.p_ctg.assembly_summary"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_A/HG002_Revio_A.asm.bp.hap2.p_ctg.assembly_summary"), + bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.bam", stringency: 'silent').getReadsMD5(), + bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.dip.bed"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.bed"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.bed"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.HG002_Revio_A.copynum.bedgraph"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.HG002_Revio_A.depth.bw"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.HG002_Revio_A.maf.bw"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_A/HG002_Revio_A.mosdepth.global.dist.txt"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_A/HG002_Revio_A.mosdepth.region.dist.txt"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_A/HG002_Revio_A.mosdepth.summary.txt"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_A/HG002_Revio_A.asm.bp.hap1.p_ctg.fasta.gz"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_A/HG002_Revio_A.asm.bp.hap2.p_ctg.fasta.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.dip.vcf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.paf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.var.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.paf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.var.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.pair.vcf.gz"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.HG002_Revio_A.vcf.gz"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.HG002_Revio_A.vcf.gz.tbi"), + file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio_A/HG002_Revio_A_modkit_pileup.bed.gz"), + file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio_A/HG002_Revio_A_modkit_pileup.bed.gz.tbi"), + bam("$outputDir/paraphase/HG002_Revio_A/HG002_Revio_A.paraphase.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio_A/HG002_Revio_A_cramino_aligned_phased.arrow"), + file("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio_A/HG002_Revio_A_cramino_aligned.arrow"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_A/HG002_Revio_A.regions.bed.gz"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_A/HG002_Revio_A.regions.bed.gz.csi"), + file("$outputDir/qc_raw_reads/fqcrs/HG002_Revio_A/HG002_PacBio_Revio.bam_other.fastq.gz.tsv.zst"), + file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_sorted.vcf.gz"), + file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_sorted.vcf.gz.tbi"), + bam("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_spanning_sorted.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/snvs/stats/single_sample/HG002_Revio_A.vcf.gz.bcftools_stats.txt").readLines()[0..2], + // Assert with snapshot HG002_Revio_B + bam("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_phased.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_B/HG002_Revio_B.asm.bp.hap1.p_ctg.assembly_summary"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_B/HG002_Revio_B.asm.bp.hap2.p_ctg.assembly_summary"), + bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.bam", stringency: 'silent').getReadsMD5(), + bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.dip.bed"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.bed"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.bed"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.HG002_Revio_B.copynum.bedgraph"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.HG002_Revio_B.depth.bw"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.HG002_Revio_B.maf.bw"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_B/HG002_Revio_B.mosdepth.global.dist.txt"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_B/HG002_Revio_B.mosdepth.region.dist.txt"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_B/HG002_Revio_B.mosdepth.summary.txt"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_B/HG002_Revio_B.asm.bp.hap1.p_ctg.fasta.gz"), + file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_B/HG002_Revio_B.asm.bp.hap2.p_ctg.fasta.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.dip.vcf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.paf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.var.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.paf.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.var.gz"), + file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.pair.vcf.gz"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.HG002_Revio_B.vcf.gz"), + file("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.HG002_Revio_B.vcf.gz.tbi"), + file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio_B/HG002_Revio_B_modkit_pileup.bed.gz"), + file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio_B/HG002_Revio_B_modkit_pileup.bed.gz.tbi"), + bam("$outputDir/paraphase/HG002_Revio_B/HG002_Revio_B.paraphase.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio_B/HG002_Revio_B_cramino_aligned_phased.arrow"), + file("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio_B/HG002_Revio_B_cramino_aligned.arrow"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_B/HG002_Revio_B.regions.bed.gz"), + file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_B/HG002_Revio_B.regions.bed.gz.csi"), + file("$outputDir/qc_raw_reads/fqcrs/HG002_Revio_B/HG002_Revio_B.merged.fastq.gz.tsv.zst"), + file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_B/HG002_Revio_B_sorted.vcf.gz"), + file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_B/HG002_Revio_B_sorted.vcf.gz.tbi"), + bam("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_B/HG002_Revio_B_spanning_sorted.bam", stringency: 'silent').getReadsMD5(), + file("$outputDir/snvs/stats/single_sample/HG002_Revio_B.vcf.gz.bcftools_stats.txt").readLines()[0..2], + ).match() }, + // Assert exists multisample - note the trgt multisample that doesn't exist in singlesample + { assert new File("$outputDir/databases/echtvar/encode/test/test.zip").exists() }, + { assert new File("$outputDir/qc_aligned_reads/somalier/relate/test/test.pairs.tsv").exists() }, + { assert new File("$outputDir/qc_aligned_reads/somalier/relate/test/test.samples.tsv").exists() }, + { assert new File("$outputDir/qc_aligned_reads/somalier/relate/test/test.html").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_somalier.txt").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_data.json").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc.log").exists() }, + { assert new File("$outputDir/multiqc/multiqc_report.html").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_sources.txt").exists() }, + { assert new File("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt").exists() }, + { assert new File("$outputDir/snvs/multi_sample/test/test_snv_annotated_ranked.vcf.gz").exists() }, + { assert new File("$outputDir/snvs/multi_sample/test/test_snv_annotated_ranked.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/multi_sample/test/test_sniffles.vcf.gz").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/multi_sample/test/test_sniffles.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz").exists() }, + { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz.tbi").exists() }, + // Assert exists HG002_Revio_A + { assert new File("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_phased.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.sam.gz").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.sam.gz").exists() }, + { assert new File("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.log").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_1.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_1.bed.gz.tbi").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_2.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_2.bed.gz.tbi").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_ungrouped.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_ungrouped.bed.gz.tbi").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio_A/HG002_Revio_A.paraphase.bam.bai").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio_A/HG002_Revio_A_paraphase_vcfs/HG002_Revio_A_hba.vcf.gz").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio_A/HG002_Revio_A_paraphase_vcfs/HG002_Revio_A_hba.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio_A/HG002_Revio_A.paraphase.json").exists() }, + { assert new File("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio_A/HG002_Revio_A_cramino_aligned_phased.txt").exists() }, + { assert new File("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio_A/HG002_Revio_A_cramino_aligned.txt").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_phased.bam.bai").exists() }, + { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_A/HG002_Revio_A_phased.vcf.gz").exists() }, + { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_A/HG002_Revio_A_phased.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_A/HG002_Revio_A_stats.stats.tsv").exists() }, + { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_A/HG002_Revio_A_stats.blocks.tsv").exists() }, + { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_A/HG002_Revio_A_fastqc.html").exists() }, + { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_A/HG002_Revio_A_fastqc.zip").exists() }, + { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_A/HG002_Revio_A_repeat_expansion_stranger.vcf.gz").exists() }, + { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_A/HG002_Revio_A_repeat_expansion_stranger.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_spanning_sorted.bam.bai").exists() }, + { assert new File("$outputDir/snvs/single_sample/HG002_Revio_A/HG002_Revio_A_snv_annotated_ranked.vcf.gz").exists() }, + { assert new File("$outputDir/snvs/single_sample/HG002_Revio_A/HG002_Revio_A_snv_annotated_ranked.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_A/HG002_Revio_A_sniffles.snf").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_A/HG002_Revio_A_sniffles.vcf.gz").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_A/HG002_Revio_A_sniffles.vcf.gz.tbi").exists() }, + // Assert exists HG002_Revio_B + { assert new File("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_phased.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.sam.gz").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.bam.bai").exists() }, + { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.sam.gz").exists() }, + { assert new File("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.log").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_1.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_1.bed.gz.tbi").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_2.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_2.bed.gz.tbi").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_ungrouped.bed.gz").exists() }, + { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_ungrouped.bed.gz.tbi").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio_B/HG002_Revio_B.paraphase.bam.bai").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio_B/HG002_Revio_B_paraphase_vcfs/HG002_Revio_B_hba.vcf.gz").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio_B/HG002_Revio_B_paraphase_vcfs/HG002_Revio_B_hba.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/paraphase/HG002_Revio_B/HG002_Revio_B.paraphase.json").exists() }, + { assert new File("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio_B/HG002_Revio_B_cramino_aligned_phased.txt").exists() }, + { assert new File("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio_B/HG002_Revio_B_cramino_aligned.txt").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_phased.bam.bai").exists() }, + { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_B/HG002_Revio_B_phased.vcf.gz").exists() }, + { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_B/HG002_Revio_B_phased.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_B/HG002_Revio_B_stats.stats.tsv").exists() }, + { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_B/HG002_Revio_B_stats.blocks.tsv").exists() }, + { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_B/HG002_Revio_B_fastqc.html").exists() }, + { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_B/HG002_Revio_B_fastqc.zip").exists() }, + { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_B/HG002_Revio_B_repeat_expansion_stranger.vcf.gz").exists() }, + { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_B/HG002_Revio_B_repeat_expansion_stranger.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_B/HG002_Revio_B_spanning_sorted.bam.bai").exists() }, + { assert new File("$outputDir/snvs/single_sample/HG002_Revio_B/HG002_Revio_B_snv_annotated_ranked.vcf.gz").exists() }, + { assert new File("$outputDir/snvs/single_sample/HG002_Revio_B/HG002_Revio_B_snv_annotated_ranked.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_B/HG002_Revio_B_sniffles.snf").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_B/HG002_Revio_B_sniffles.vcf.gz").exists() }, + { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_B/HG002_Revio_B_sniffles.vcf.gz.tbi").exists() } + ) + } + } +}