nf-core · suzannejin · Jan 27, 2025 · Dec 11, 2024 · Dec 13, 2024 · Dec 13, 2024
diff --git a/modules/nf-core/gprofiler2/gost/main.nf b/modules/nf-core/gprofiler2/gost/main.nf
@@ -9,8 +9,8 @@ process GPROFILER2_GOST {
 
     input:
     tuple val(meta), path(de_file)
-    path(gmt_file)
-    path(background_file)
+    tuple val(meta2), path(gmt_file)
+    tuple val(meta3), path(background_file)
 
     output:
     tuple val(meta), path("*.gprofiler2.all_enriched_pathways.tsv")     , emit: all_enrich

diff --git a/modules/nf-core/gprofiler2/gost/meta.yml b/modules/nf-core/gprofiler2/gost/meta.yml
@@ -27,12 +27,18 @@ input:
         pattern: "*.{csv,tsv}"
         description: |
           CSV or TSV-format tabular file with differential analysis outputs
-  - - gmt_file:
+  - - meta2:
+        type: map
+        description: Groovy map
+    - gmt_file:
         type: file
         pattern: "*.gmt"
         description: |
           Path to a GMT file downloaded from g:profiler that should be queried instead of the online databases
-  - - background_file:
+  - - meta3:
+        type: map
+        description: Groovy map
+    - background_file:
         type: file
         pattern: "*.{csv,tsv,txt}"
         description: |

diff --git a/modules/nf-core/gprofiler2/gost/tests/main.nf.test b/modules/nf-core/gprofiler2/gost/tests/main.nf.test
@@ -55,8 +55,14 @@ nextflow_process {
                     ['id':'Condition_genotype_WT_KO', 'variable':'Condition genotype', 'reference':'WT', 'target':'KO', 'blocking':'batch'],
                     file(params.modules_testdata_base_path + "genomics/mus_musculus/rnaseq_expression/Condition_genotype_WT_KO.deseq2.results_filtered.tsv", checkIfExists: true)
                 ]
-                input[1] = file(params.modules_testdata_base_path + "genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt", checkIfExists: true)
-                input[2] = file(params.modules_testdata_base_path + "genomics/mus_musculus/rnaseq_expression/study.filtered.tsv", checkIfExists: true)
+                input[1] = [
+                    ['id': 'test'],
+                    file(params.modules_testdata_base_path + "genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt", checkIfExists: true)
+                ]
+                input[2] = [
+                    ['id': 'test'],
+                    file(params.modules_testdata_base_path + "genomics/mus_musculus/rnaseq_expression/study.filtered.tsv", checkIfExists: true)
+                ]
                 """
             }
         }
@@ -66,9 +72,9 @@ nextflow_process {
                 { assert process.success },
                 { assert snapshot(
                     process.out.all_enrich,
-                    process.out.plot_png,
                     process.out.sub_enrich,
-                    process.out.sub_plot,
+                    file(process.out.plot_png[0][1]).name, //assert unstable file
+                    process.out.sub_plot[0][1].collect{ file(it).name }, //assert unstable file
                     process.out.filtered_gmt,
                     process.out.session_info.collect{ meta,session_info -> file(session_info).name }, //assert unstable file
                     process.out.versions,
@@ -94,8 +100,14 @@ nextflow_process {
                     ['id':'Condition_genotype_WT_KO', 'variable':'Condition genotype', 'reference':'WT', 'target':'KO', 'blocking':'batch'],
                     file(params.modules_testdata_base_path + "genomics/mus_musculus/rnaseq_expression/Condition_genotype_WT_KO.deseq2.results_filtered.tsv", checkIfExists: true)
                 ]
-                input[1] = file(params.modules_testdata_base_path + "genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt", checkIfExists: true)
-                input[2] = file(params.modules_testdata_base_path + "genomics/mus_musculus/rnaseq_expression/study.filtered.tsv", checkIfExists: true)
+                input[1] = [
+                    ['id': 'test'],
+                    file(params.modules_testdata_base_path + "genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt", checkIfExists: true)
+                ]
+                input[2] = [
+                    ['id': 'test'],
+                    file(params.modules_testdata_base_path + "genomics/mus_musculus/rnaseq_expression/study.filtered.tsv", checkIfExists: true)
+                ]
                 """
             }
         }

diff --git a/modules/nf-core/gprofiler2/gost/tests/main.nf.test.snap b/modules/nf-core/gprofiler2/gost/tests/main.nf.test.snap
@@ -13,18 +13,6 @@
                     "Condition_genotype_WT_KO.gprofiler2.all_enriched_pathways.tsv:md5,1134a02ca061c463bcbff277eefbfb19"
                 ]
             ],
-            [
-                [
-                    {
-                        "id": "Condition_genotype_WT_KO",
-                        "variable": "Condition genotype",
-                        "reference": "WT",
-                        "target": "KO",
-                        "blocking": "batch"
-                    },
-                    "Condition_genotype_WT_KO.gprofiler2.gostplot.png:md5,4b83d1bcf7dc9dbf6cef3d545e440c5b"
-                ]
-            ],
             [
                 [
                     {
@@ -47,27 +35,17 @@
                     ]
                 ]
             ],
+            "Condition_genotype_WT_KO.gprofiler2.gostplot.png",
             [
-                [
-                    {
-                        "id": "Condition_genotype_WT_KO",
-                        "variable": "Condition genotype",
-                        "reference": "WT",
-                        "target": "KO",
-                        "blocking": "batch"
-                    },
-                    [
-                        "Condition_genotype_WT_KO.gprofiler2.GO:BP.sub_enriched_pathways.png:md5,d89498267e985adf0ad1266e2deb9f48",
-                        "Condition_genotype_WT_KO.gprofiler2.GO:CC.sub_enriched_pathways.png:md5,e04cdd51b200671613254d021d3af242",
-                        "Condition_genotype_WT_KO.gprofiler2.GO:MF.sub_enriched_pathways.png:md5,33ea0652d78111978677acde0fe7f807",
-                        "Condition_genotype_WT_KO.gprofiler2.HP.sub_enriched_pathways.png:md5,6c040ac4baba73ae5637b00650e6aea1",
-                        "Condition_genotype_WT_KO.gprofiler2.KEGG.sub_enriched_pathways.png:md5,fbd232c4eeced95ceda60b43a02dbe1f",
-                        "Condition_genotype_WT_KO.gprofiler2.MIRNA.sub_enriched_pathways.png:md5,956880d3bf4852a06b0ffaaaba565732",
-                        "Condition_genotype_WT_KO.gprofiler2.REAC.sub_enriched_pathways.png:md5,0e8f9217d275668986771dc7fede3170",
-                        "Condition_genotype_WT_KO.gprofiler2.TF.sub_enriched_pathways.png:md5,0697164bc87e95e6508db966df94e01e",
-                        "Condition_genotype_WT_KO.gprofiler2.WP.sub_enriched_pathways.png:md5,09976762c7541ff9e5009e8763986845"
-                    ]
-                ]
+                "Condition_genotype_WT_KO.gprofiler2.GO:BP.sub_enriched_pathways.png",
+                "Condition_genotype_WT_KO.gprofiler2.GO:CC.sub_enriched_pathways.png",
+                "Condition_genotype_WT_KO.gprofiler2.GO:MF.sub_enriched_pathways.png",
+                "Condition_genotype_WT_KO.gprofiler2.HP.sub_enriched_pathways.png",
+                "Condition_genotype_WT_KO.gprofiler2.KEGG.sub_enriched_pathways.png",
+                "Condition_genotype_WT_KO.gprofiler2.MIRNA.sub_enriched_pathways.png",
+                "Condition_genotype_WT_KO.gprofiler2.REAC.sub_enriched_pathways.png",
+                "Condition_genotype_WT_KO.gprofiler2.TF.sub_enriched_pathways.png",
+                "Condition_genotype_WT_KO.gprofiler2.WP.sub_enriched_pathways.png"
             ],
             [
 
@@ -89,7 +67,7 @@
             "nf-test": "0.9.2",
             "nextflow": "24.10.3"
         },
-        "timestamp": "2025-01-09T13:43:18.555455129"
+        "timestamp": "2025-01-21T11:29:54.746689985"
     },
     "stub": {
         "content": [
@@ -298,6 +276,6 @@
             "nf-test": "0.9.2",
             "nextflow": "24.10.3"
         },
-        "timestamp": "2025-01-09T13:43:36.462475057"
+        "timestamp": "2025-01-21T11:31:33.394855046"
     }
 }
diff --git a/modules/nf-core/gsea/gsea/main.nf b/modules/nf-core/gsea/gsea/main.nf
@@ -10,7 +10,7 @@ process GSEA_GSEA {
     input:
     tuple val(meta), path(gct), path(cls), path(gene_sets)
     tuple val(reference), val(target)
-    path(chip) // Optional identifier mapping file
+    tuple val(meta2), path(chip) // Optional identifier mapping file
 
     output:
     tuple val(meta), path("*.rpt")                             , emit: rpt

diff --git a/modules/nf-core/gsea/gsea/meta.yml b/modules/nf-core/gsea/gsea/meta.yml
@@ -40,7 +40,10 @@ input:
         description: |
           String indicating which of the classes in the cls file should be used
           as the target level of the comparison.
-  - - chip:
+  - - meta2:
+        type: map
+        description: Groovy map
+    - chip:
         type: file
         description: |
           optional Broad-style chip file mapping identifiers in gct to

diff --git a/modules/nf-core/gsea/gsea/tests/main.nf.test b/modules/nf-core/gsea/gsea/tests/main.nf.test
@@ -17,7 +17,10 @@ nextflow_process {
                 """
                 input[0] = [['id':'Condition_genotype_WT_KO', 'variable':'Condition genotype', 'reference':'WT', 'target':'KO', 'blocking':'batch'], file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Condition_treatment_Control_Treated.gct", checkIfExists:true), file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Condition_genotype_WT_KO.cls", checkIfExists:true), file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt", checkIfExists:true)]
                 input[1] = ['WT', 'KO']
-                input[2] = file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Mus_musculus.anno.feature_metadata.chip", checkIfExists:true)
+                input[2] = [
+                    ['id': 'test'],
+                    file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Mus_musculus.anno.feature_metadata.chip", checkIfExists:true)
+                ]
                 """
             }
         }
@@ -63,7 +66,10 @@ nextflow_process {
                 """
                 input[0] = [['id':'Condition_genotype_WT_KO', 'variable':'Condition genotype', 'reference':'WT', 'target':'KO', 'blocking':'batch'], file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Condition_treatment_Control_Treated.gct", checkIfExists:true), file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Condition_genotype_WT_KO.cls", checkIfExists:true), file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt", checkIfExists:true)]
                 input[1] = ['WT', 'KO']
-                input[2] = file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Mus_musculus.anno.feature_metadata.chip", checkIfExists:true)
+                input[2] = [
+                    ['id': 'test'],
+                    file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Mus_musculus.anno.feature_metadata.chip", checkIfExists:true)
+                ]
                 """
             }
         }

diff --git a/subworkflows/nf-core/differential_functional_enrichment/main.nf b/subworkflows/nf-core/differential_functional_enrichment/main.nf
@@ -0,0 +1,150 @@
+
+//
+// Perform enrichment analysis
+//
+include { GPROFILER2_GOST          } from "../../../modules/nf-core/gprofiler2/gost/main.nf"
+include { CUSTOM_TABULARTOGSEAGCT  } from '../../../modules/nf-core/custom/tabulartogseagct/main.nf'
+include { CUSTOM_TABULARTOGSEACLS  } from '../../../modules/nf-core/custom/tabulartogseacls/main.nf'
+include { CUSTOM_TABULARTOGSEACHIP } from '../../../modules/nf-core/custom/tabulartogseachip/main.nf'
+include { GSEA_GSEA                } from '../../../modules/nf-core/gsea/gsea/main.nf'
+include { PROPR_GREA               } from "../../../modules/nf-core/propr/grea/main.nf"
+
+// Combine meta maps, including merging non-identical values of shared keys (e.g. 'id')
+def mergeMaps(meta, meta2){
+    (meta + meta2).collectEntries { k, v ->
+        meta[k] && meta[k] != v ? [k, "${meta[k]}_${v}"] : [k, v]
+    }
+}
+
+workflow DIFFERENTIAL_FUNCTIONAL_ENRICHMENT {
+    take:
+    // input data for functional analysis
+    // They can be the results from differential expression analysis or abundance matrix
+    // The functional analysis method to run should be explicitly provided
+    ch_input                            // [ meta_input, input file, method to run ]
+
+    // gene sets and background
+    ch_gene_sets                        // [ meta_gmt, gmt file ]
+    ch_background                       // [ meta_background, background file ]
+
+    // other - for the moment these files are only needed for GSEA
+    ch_contrasts                        // [ meta_contrast, contrast_variable, reference, target ]
+    ch_samplesheet                      // [ meta_exp, samples sheet ]
+    ch_featuresheet                     // [ meta_exp, features sheet, features id, features symbol ]
+
+    main:
+
+    ch_versions = Channel.empty()
+
+    // Add method information into meta map of ch_input
+    // This information is used later to determine which method to run for each input
+
+    ch_input = ch_input
+        .combine(ch_gene_sets)
+        .combine(ch_background)
+        .multiMap {
+            meta_input, file, analysis_method, meta_gmt, gmt, meta_background, background ->
+            def meta_new = meta_input + [ 'method': analysis_method ]
+            input:
+                [ meta_new, file ]
+            gene_sets:
+                [ meta_new, gmt ]  // NOTE here we assume that the modules will not make use of meta_gmt and meta_background
+            background:
+                [ meta_new, background ]
+        }
+
+    // In the case of GSEA, it needs additional files coming from other channels that other methods don't use
+    // here we define the input channel for the GSEA section
+
+    def criteria = multiMapCriteria { meta_input, input, gmt, meta_exp, samplesheet, featuresheet, features_id, features_symbol, meta_contrasts, variable, reference, target ->
+        def meta_contrasts_new = meta_contrasts + [ 'variable': variable, 'reference': reference, 'target': target ]  // make sure variable, reference, target are in the meta
+        def meta_all = mergeMaps(meta_contrasts_new, meta_input)
+        input:
+            [ meta_all, input ]
+        gene_sets:
+            [ meta_all, gmt ]
+        contrasts_and_samples:
+            [ meta_all, samplesheet ]
+        features:
+            [ meta_exp, featuresheet ]
+        features_cols:
+            [ features_id, features_symbol ]
+    }
+    ch_preinput_for_gsea = ch_input.input
+        .join(ch_input.gene_sets)
+        .filter{ it[0].method == 'gsea' }
+        .combine(ch_samplesheet.join(ch_featuresheet))
+        .combine(ch_contrasts)
+        .multiMap(criteria)
+
+    // ----------------------------------------------------
+    // Perform enrichment analysis with gprofiler2
+    // ----------------------------------------------------
+
+    GPROFILER2_GOST(
+        ch_input.input.filter{ it[0].method == 'gprofiler2' },
+        ch_input.gene_sets.filter{ it[0].method == 'gprofiler2'},
+        ch_input.background.filter{ it[0].method == 'gprofiler2'}
+    )
+
+    // ----------------------------------------------------
+    // Perform enrichment analysis with GSEA
+    // ----------------------------------------------------
+
+    // NOTE that GCT input can be more than 1, if they come from different tools (eg. limma, deseq2).
+    // CLS input can be as many as combinations of input x contrasts
+    // Whereas features can be only one file.
+
+    CUSTOM_TABULARTOGSEAGCT(ch_preinput_for_gsea.input)
+
+    CUSTOM_TABULARTOGSEACLS(ch_preinput_for_gsea.contrasts_and_samples)
+
+    CUSTOM_TABULARTOGSEACHIP(
+        ch_preinput_for_gsea.features.first(),
+        ch_preinput_for_gsea.features_cols.first()
+    )
+
+    ch_input_for_gsea = CUSTOM_TABULARTOGSEAGCT.out.gct
+        .join(CUSTOM_TABULARTOGSEACLS.out.cls)
+        .join( ch_preinput_for_gsea.gene_sets )
+
+    GSEA_GSEA(
+        ch_input_for_gsea,
+        ch_input_for_gsea.map{ tuple(it[0].reference, it[0].target) },
+        CUSTOM_TABULARTOGSEACHIP.out.chip.first()
+    )
+
+    // ----------------------------------------------------
+    // Perform enrichment analysis with GREA
+    // ----------------------------------------------------
+
+    PROPR_GREA(
+        ch_input.input.filter{ it[0].method == 'grea' },
+        ch_input.gene_sets.filter{ it[0].method == 'grea' }
+    )
+
+    emit:
+    // here we emit the outputs that will be useful afterwards in the
+    // nf-core/differentialabundance pipeline
+
+    // gprofiler2-specific outputs
+    gprofiler2_all_enrich = GPROFILER2_GOST.out.all_enrich
+    gprofiler2_sub_enrich = GPROFILER2_GOST.out.sub_enrich
+    gprofiler2_plot_html  = GPROFILER2_GOST.out.plot_html
+
+    // gsea-specific outputs
+    gsea_report           = GSEA_GSEA.out.report_tsvs_ref
+                                .join(GSEA_GSEA.out.report_tsvs_target)
+
+    // grea-specific outputs
+    grea_results          = PROPR_GREA.out.results
+
+    // tool versions
+    versions              = ch_versions
+                                .mix(GPROFILER2_GOST.out.versions)
+                                .mix(CUSTOM_TABULARTOGSEAGCT.out.versions)
+                                .mix(CUSTOM_TABULARTOGSEACLS.out.versions)
+                                .mix(CUSTOM_TABULARTOGSEACHIP.out.versions)
+                                .mix(GSEA_GSEA.out.versions)
+                                .mix(PROPR_GREA.out.versions)
+}