metrics all is added. readme updated

openproblems-bio · Jan 11, 2025 · b3e2d67 · b3e2d67
1 parent 50c65b3
commit b3e2d67
Show file tree

Hide file tree

Showing 20 changed files with 522 additions and 306 deletions.
diff --git a/runs.ipynb b/runs.ipynb
diff --git a/scripts/calculate_score.sh b/scripts/calculate_score.sh
@@ -0,0 +1,15 @@
+# bash src/metrics/all_metrics/run.sh resources/grn_models/norman/grnboost2.csv norman
+
+prediction=${1}
+dataset_id=${2}
+
+viash run src/metrics/all_metrics/config.novsh.yaml -- \
+    --prediction ${prediction} \
+    --dataset_id ${dataset_id} \
+    --score output/score.h5ad \
+    --tf_all resources/prior/tf_all.csv \
+    --regulators_consensus resources/prior/regulators_consensus_${dataset_id}.json \
+    --ws_consensus resources/prior/ws_consensus_${dataset_id}.csv \
+    --ws_distance_background resources/prior/ws_distance_background_${dataset_id}.csv \
+    --evaluation_data_sc resources/evaluation_datasets/${dataset_id}_sc_counts.h5ad \
+    --evaluation_data resources/evaluation_datasets/${dataset_id}_perturbation.h5ad
diff --git a/scripts/download_resources.sh b/scripts/download_resources.sh
@@ -5,33 +5,23 @@ set -e
 echo ">> Downloading resources"
 
 viash run src/common/sync_test_resources/config.vsh.yaml -- \
-  --input "s3://openproblems-data/resources/grn/grn-benchmark" \
-  --output "resources/grn-benchmark" \
+  --input "s3://openproblems-data/resources/grn/inference_datasets/" \
+  --output "resources/inference_datasets/" \
   --delete
 
 viash run src/common/sync_test_resources/config.vsh.yaml -- \
-  --input "s3://openproblems-data/resources/grn/prior" \
-  --output "resources/prior" \
+  --input "s3://openproblems-data/resources/grn/evaluation_datasets/" \
+  --output "resources/evaluation_datasets/" \
   --delete
 
 viash run src/common/sync_test_resources/config.vsh.yaml -- \
-  --input "s3://openproblems-data/resources/grn/grn_models" \
-  --output "resources/grn_models" \
-  --delete
-echo ">> Downloading resources test"
-viash run src/common/sync_test_resources/config.vsh.yaml -- \
-  --input "s3://openproblems-data/resources_test/grn/grn-benchmark" \
-  --output "resources_test/grn-benchmark" \
-  --delete
-
-viash run src/common/sync_test_resources/config.vsh.yaml -- \
-  --input "s3://openproblems-data/resources_test/grn/prior" \
-  --output "resources_test/prior" \
+  --input "s3://openproblems-data/resources/grn/prior" \
+  --output "resources/prior" \
   --delete
 
 viash run src/common/sync_test_resources/config.vsh.yaml -- \
-  --input "s3://openproblems-data/resources_test/grn/grn_models" \
-  --output "resources_tests/grn_models" \
+  --input "s3://openproblems-data/resources/grn/grn_models/" \
+  --output "resources/grn_models/" \
   --delete
 
 
diff --git a/scripts/download_resources_all.sh b/scripts/download_resources_all.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+set -e
+
+echo ">> Downloading resources"
+
+viash run src/common/sync_test_resources/config.vsh.yaml -- \
+  --input "s3://openproblems-data/resources/grn/" \
+  --output "resources/" \
+  --delete
+
+
+
diff --git a/scripts/render_readme.sh b/scripts/render_readme.sh
@@ -3,7 +3,7 @@
 set -e
 
 viash run src/common/create_task_readme/config.vsh.yaml -- \
-  --task "grn_benchmark" \
+  --task "grn_inference" \
   --task_dir "src" \
   --github_url "https://github.com/openproblems-bio/task_grn_inference/tree/main/" \
   --output "README.md"
diff --git a/scripts/upload_resources.sh b/scripts/upload_resources.sh
diff --git a/src/api/comp_metric.yaml b/src/api/comp_metric.yaml
@@ -8,48 +8,19 @@ functionality:
       description: |
         A metric to evaluate the performance of the inferred GRN
   arguments: 
-    - name: --evaluation_data
-      __merge__: file_evaluation_h5ad.yaml
-      required: false
-      direction: input
     - name: --prediction
       __merge__: file_prediction.yaml
       required: true
       direction: input
     - name: --score
       __merge__: file_score.yaml
-      required: false
-      direction: output
-    - name: --tf_all
-      type: file
-      direction: input
       required: true
-      example: resources_test/prior/tf_all.csv
-    - name: --reg_type
-      type: string
-      direction: input
-      default: ridge
-      description: name of regretion to use
-      multiple: false
-    - name: --subsample
-      type: integer
-      direction: input
-      default: -1
-      description: number of samples randomly drawn from perturbation data
-    - name: --num_workers
-      type: integer
-      direction: input
-      default: 4
+      direction: output
     - name: --method_id 
       type: string 
       direction: input 
       required: false
-      example: collectri
-    - name: --apply_tf
-      type: boolean 
-      required: false
-      default: true
-
+      example: grnboost2
     - name: --layer
       type: string
       direction: input
@@ -62,14 +33,6 @@ functionality:
       type: integer
       default: 2
       direction: input
-    - name: --skeleton 
-      type: file
-      direction: input
-      example: resources_test/prior/skeleton.csv
-    - name: --apply_skeleton 
-      type: boolean
-      direction: input
-      default: false
     - name: --dataset_id
       type: string
       direction: input

diff --git a/src/api/comp_metric_regression.yaml b/src/api/comp_metric_regression.yaml
@@ -0,0 +1,42 @@
+__merge__: comp_metric.yaml
+functionality:
+  name: metrics_regression
+  namespace: "metrics"
+  info:
+    label: metrics_regression
+    summary: Calculates regression scores
+  arguments: 
+    - name: --evaluation_data
+      __merge__: file_evaluation_h5ad.yaml
+      required: false
+      direction: input
+    - name: --tf_all
+      type: file
+      direction: input
+      required: true
+      example: resources_test/prior/tf_all.csv
+    - name: --reg_type
+      type: string
+      direction: input
+      default: ridge
+      description: name of regretion to use
+      multiple: false
+    - name: --subsample
+      type: integer
+      direction: input
+      default: -1
+      description: number of samples randomly drawn from perturbation data
+    - name: --num_workers
+      type: integer
+      direction: input
+      default: 4
+    - name: --apply_tf
+      type: boolean 
+      required: false
+      default: true
+    - name: --apply_skeleton
+      type: boolean 
+      required: false
+      default: false
+
+
diff --git a/src/api/comp_metric_ws.yaml b/src/api/comp_metric_ws.yaml
@@ -0,0 +1,26 @@
+__merge__: comp_metric.yaml
+functionality:
+  name: ws_distance
+  namespace: "metrics"
+  info:
+    label: ws_distance
+    summary: Calculates Wasserstein distance for a given GRN and dataset
+  arguments:
+    - name: --ws_consensus
+      type: file
+      direction: input
+      must_exist: false
+      required: true
+      example: resources_test/prior/ws_consensus_norman.csv 
+    - name: --ws_distance_background
+      type: file
+      direction: input
+      must_exist: false
+      required: true
+      example: resources_test/prior/ws_distance_background_norman.csv
+    - name: --evaluation_data_sc
+      type: file
+      required: true
+      direction: input
+      example: 'resources_test/datasets_raw/adamson_sc_counts.h5ad'
+
diff --git a/src/api/task_info.yaml b/src/api/task_info.yaml
@@ -1,13 +1,20 @@
 name: GRN inference benchmark
-label: A dynamic benchmark for gene regulatory network (GRN) inference  
+label: Living benchmark for gene regulatory network (GRN) inference  
 motivation: |
   GRNs are essential for understanding cellular identity and behavior. They are simplified models of gene expression regulated by complex processes involving multiple layers of control, from transcription to post-transcriptional modifications, incorporating various regulatory elements and non-coding RNAs. Gene transcription is controlled by a regulatory complex that includes transcription factors (TFs), cis-regulatory elements (CREs) like promoters and enhancers, and essential co-factors. High-throughput datasets, covering thousands of genes, facilitate the use of machine learning approaches to decipher GRNs. The advent of single-cell sequencing technologies, such as scRNA-seq, has made it possible to infer GRNs from a single experiment due to the abundance of samples. This allows researchers to infer condition-specific GRNs, such as for different cell types or diseases, and study potential regulatory factors associated with these conditions. Combining chromatin accessibility data with gene expression measurements has led to the development of enhancer-driven GRN (eGRN) inference pipelines, which offer significantly improved accuracy over single-modality methods.
 description: |
-  Here, we present a dynamic benchmark platform for GRN inference. This platform provides curated datasets for GRN inference and evaluation, standardized evaluation protocols and metrics, computational infrastructure, and a dynamically updated leaderboard to track state-of-the-art methods. It runs novel GRNs in the cloud, offers competition scores, and stores them for future comparisons, reflecting new developments over time.
+  Here, we present geneRNIB as a living benchmark platform for GRN inference. This platform provides curated datasets for GRN inference and evaluation, standardized evaluation protocols and metrics, computational infrastructure, and a dynamically updated leaderboard to track state-of-the-art methods. It runs novel GRNs in the cloud, offers competition scores, and stores them for future comparisons, reflecting new developments over time.
 
-  The platform supports the integration of new datasets and protocols. When a new feature is added, previously evaluated GRNs are re-assessed, and the leaderboard is updated accordingly. The aim is to evaluate both the accuracy and completeness of inferred GRNs. It is designed for both single-modality and multi-omics GRN inference. Ultimately, it is a community-driven platform. So far, six eGRN inference methods have been integrated: Scenic+, CellOracle, FigR, scGLUE, GRaNIE, and ANANSE.
+  The platform supports the integration of new datasets and protocols. When a new feature is added, previously evaluated GRNs are re-assessed, and the leaderboard is updated accordingly. The aim is to evaluate both the accuracy and completeness of inferred GRNs. It is designed for both single-modality and multi-omics GRN inference. Ultimately, it is a community-driven platform. 
+  
+  So far, ten GRN inference methods have been integrated: five sinlge-omics methods of GRNBoost2, GENIE3, Portia, PPCOR, and Scenic; and five eGRN inference methods of Scenic+, CellOracle, FigR, scGLUE, and GRaNIE.
+
+  Due to its flexible nature, the platform can incorporate various benchmark datasets and evaluation methods, using either prior knowledge or feature-based approaches. 
+  In the current version, due to the absence of standardized prior knowledge, we use indirect approaches to benchmark GRNs. Employing interventional data as evaluation datasets, we have developed 8 metrics using feature-based approach and Wasserstein distance, accounting for both accuracy and comprehensiveness.
+
+  Five datasets have been integrated so far, namely OPSCA, Nakatake, Norman, Adamson, and Replogle. For each dataset, standardized inference datasets are provided to be used for GRN inference and evaluation datasets are employed to benchmark.
+  See our publication for the details of methods. 
 
-  Due to its flexible nature, the platform can incorporate various benchmark datasets and evaluation methods, using either prior knowledge or feature-based approaches. In the current version, due to the absence of standardized prior knowledge, we use a feature-based approach to benchmark GRNs. Our evaluation utilizes standardized datasets for GRN inference and evaluation, employing multiple regression analysis approaches to assess both accuracy and comprehensiveness.
 
 summary: |
   Benchmarking GRN inference methods
@@ -28,21 +35,16 @@ readme: |
   # download resources
   scripts/download_resources.sh
   ```
+  The datasets for GRN inference are located in `resources/inference_datasets`. 
   ## Infer a GRN 
-
-  ```bash
-  viash run src/methods/dummy/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad --multiomics_atac resources/grn-benchmark/multiomics_atac.h5ad --prediction output/dummy.csv
-
-  ```
-  Similarly, run the command for other methods.
+  One GRN should be inferred for each inference dataset (op, norman, replogle2, adamson, and nakatake). The inferred GRN should have three columns of `source, target, weight`. See `resources/grn_models/op/grnboost2.csv` as an example.
   
   ## Evaluate a GRN
+  Once a GRN is inferred (e.g. located in `output/your_GRN.csv`) for a given dataset (e.g. `norman`), use the following code to obtain evaluation scores. 
   ```bash
-  scripts/benchmark_grn.sh --grn resources/grn-benchmark/models/collectri.csv 
-
+  scripts/calculate_score.sh output/your_GRN.csv norman
   ```
-
-  Similarly, run the command for other GRN models.
+  This will calculate and print the scores as well as output the scores into `output/score.h5ad`
  
   ## Add a method
 
@@ -64,14 +66,15 @@ authors:
     roles: [ contributor ]
     info:
       github: AntoinePassemiers
-  - name: Christian Arnold 
-    roles: [ contributor ]
-    info:
-      github: chrarnold
   - name: Marco Stock
     roles: [ contributor ]
     info:
       github: stkmrc
+  - name: Christian Arnold 
+    roles: [ contributor ]
+    info:
+      github: chrarnold
+
 
 
 
diff --git a/src/metrics/all_metrics/config.novsh.yaml b/src/metrics/all_metrics/config.novsh.yaml
@@ -0,0 +1,97 @@
+
+__merge__: ../../api/comp_metric.yaml
+
+functionality:
+  name: metrics_all
+  info:
+    label: metrics_all
+    summary: Calculates all metrics for a given GRN and dataset
+  arguments: 
+    - name: --evaluation_data
+      type: file
+      required: true
+      direction: input
+    - name: --tf_all
+      type: file
+      direction: input
+      required: true
+      example: resources_test/prior/tf_all.csv
+    - name: --reg_type
+      type: string
+      direction: input
+      default: ridge
+      description: name of regretion to use
+      multiple: false
+    - name: --subsample
+      type: integer
+      direction: input
+      default: -1
+      description: number of samples randomly drawn from perturbation data
+    - name: --num_workers
+      type: integer
+      direction: input
+      default: 4
+    - name: --apply_tf
+      type: boolean 
+      required: false
+      default: true
+    - name: --apply_skeleton
+      type: boolean 
+      required: false
+      default: false
+    - name: --regulators_consensus
+      type: file
+      direction: input
+      must_exist: false
+      required: true
+      example: resources_test/prior/regulators_consensus_norman.json
+    - name: --static_only
+      direction: input
+      type: boolean
+      default: true
+    - name: --binarize
+      type: boolean 
+      direction: input 
+      description: whether to binarize the weight
+      default: true
+    - name: --ws_consensus
+      type: file
+      direction: input
+      must_exist: false
+      required: true
+      example: resources_test/prior/ws_consensus_norman.csv 
+    - name: --ws_distance_background
+      type: file
+      direction: input
+      must_exist: false
+      required: true
+      example: resources_test/prior/ws_distance_background_norman.csv
+    - name: --evaluation_data_sc
+      type: file
+      required: true
+      direction: input
+      example: 'resources_test/datasets_raw/adamson_sc_counts.h5ad'
+
+
+  resources:
+    - type: python_script
+      path: script.py
+    - path: /src/utils/util.py
+      dest: util.py
+    - path: /src/metrics/regression_1/main.py
+      dest: reg1_main.py
+    - path: /src/metrics/regression_2/main.py
+      dest: reg2_main.py
+    - path: /src/metrics/wasserstein/main.py
+      dest: ws_main.py
+
+
+platforms:
+  - type: docker
+    image: ghcr.io/openproblems-bio/base_python:1.0.4
+    setup:
+      - type: python
+        packages: [ lightgbm==4.3.0, numpy==1.26.4 ]
+  - type: nextflow
+    directives:
+      label: [ midtime, midmem, midcpu ]