openproblems-bio · lazappi · Jan 21, 2025 · Jan 21, 2025 · Jan 21, 2025 · Jan 21, 2025
diff --git a/common b/common
diff --git a/src/methods/geneformer/config.vsh.yaml b/src/methods/geneformer/config.vsh.yaml
@@ -21,20 +21,24 @@ info:
   method_types: [embedding]
   variants:
     geneformer_12L_95M_i4096:
-      model: "gf-12L-95M-i4096"
+      model: gf-12L-95M-i4096
     geneformer_6L_30M_i2048:
-      model: "gf-6L-30M-i2048"
+      model: gf-6L-30M-i2048
     geneformer_12L_30M_i2048:
-      model: "gf-12L-30M-i2048"
+      model: gf-12L-30M-i2048
     geneformer_20L_95M_i4096:
-      model: "gf-20L-95M-i4096"
+      model: gf-20L-95M-i4096
 
 arguments:
-  - name: "--model"
-    type: "string"
+  - name: --model
+    type: string
     description: String representing the Geneformer model to use
-    choices: ["gf-6L-30M-i2048", "gf-12L-30M-i2048", "gf-12L-95M-i4096", "gf-20L-95M-i4096"]
-    default: "gf-12L-95M-i4096"
+    choices:
+      - gf-6L-30M-i2048
+      - gf-12L-30M-i2048
+      - gf-12L-95M-i4096
+      - gf-20L-95M-i4096
+    default: gf-12L-95M-i4096
 
 resources:
   - type: python_script
@@ -48,9 +52,9 @@ engines:
     setup:
       - type: python
         pip:
-        - pyarrow<15.0.0a0,>=14.0.1
-        - huggingface_hub
-        - git+https://huggingface.co/ctheodoris/Geneformer.git
+          - pyarrow<15.0.0a0,>=14.0.1
+          - huggingface_hub
+          - git+https://huggingface.co/ctheodoris/Geneformer.git
 
 runners:
   - type: executable

diff --git a/src/methods/scgpt_finetuned/config.vsh.yaml b/src/methods/scgpt_finetuned/config.vsh.yaml
@@ -44,6 +44,7 @@ resources:
     path: script.py
   - path: /src/utils/read_anndata_partial.py
   - path: scgpt_functions.py
+  - path: /src/utils/exit_codes.py
 
 engines:
   - type: docker

diff --git a/src/methods/scgpt_finetuned/script.py b/src/methods/scgpt_finetuned/script.py
@@ -30,6 +30,7 @@
 
 sys.path.append(meta["resources_dir"])
 from read_anndata_partial import read_anndata
+from exit_codes import exit_non_applicable
 from scgpt_functions import evaluate, prepare_data, prepare_dataloader, train
 
 print(f"====== scGPT version {scgpt.__version__} ======", flush=True)
@@ -39,7 +40,7 @@
 adata = read_anndata(par["input"], X="layers/counts", obs="obs", var="var", uns="uns")
 
 if adata.uns["dataset_organism"] != "homo_sapiens":
-    raise ValueError(
+    exit_non_applicable(
         f"scGPT can only be used with human data "
         f"(dataset_organism == \"{adata.uns['dataset_organism']}\")"
     )

diff --git a/src/methods/scprint/config.vsh.yaml b/src/methods/scprint/config.vsh.yaml
@@ -1,4 +1,4 @@
-__merge__: /src/api/base_method.yaml
+__merge__: /src/api/comp_method.yaml
 
 name: scprint
 label: scPRINT
@@ -38,6 +38,11 @@ info:
       model_name: "medium"
     scprint_small:
       model_name: "small"
+  test_setup:
+    run:
+      model_name: small
+      batch_size: 16
+      max_len: 100
 
 arguments:
   - name: "--model_name"
@@ -49,6 +54,14 @@ arguments:
     type: file
     description: Path to the scPRINT model.
     required: false
+  - name: --batch_size
+    type: integer
+    description: The size of the batches to be used in the DataLoader.
+    default: 64
+  - name: --max_len
+    type: integer
+    description: The maximum length of the gene sequence.
+    default: 4000
 
 resources:
   - type: python_script
@@ -79,4 +92,4 @@ runners:
   - type: executable
   - type: nextflow
     directives:
-      label: [hightime, midmem, midcpu, gpu]
+      label: [hightime, midmem, midcpu, gpu, midsharedmem]
diff --git a/src/methods/scprint/script.py b/src/methods/scprint/script.py
@@ -1,12 +1,13 @@
-import anndata as ad
-from scdataloader import Preprocessor
+import os
 import sys
-from huggingface_hub import hf_hub_download
-from scprint.tasks import Embedder
-from scprint import scPrint
+
+import anndata as ad
 import scprint
 import torch
-import os
+from huggingface_hub import hf_hub_download
+from scdataloader import Preprocessor
+from scprint import scPrint
+from scprint.tasks import Embedder
 
 ## VIASH START
 par = {
@@ -19,8 +20,8 @@
 ## VIASH END
 
 sys.path.append(meta["resources_dir"])
-from read_anndata_partial import read_anndata
 from exit_codes import exit_non_applicable
+from read_anndata_partial import read_anndata
 
 print(f"====== scPRINT version {scprint.__version__} ======", flush=True)
 
@@ -41,7 +42,7 @@
 
 print("\n>>> Preprocessing data...", flush=True)
 preprocessor = Preprocessor(
-    min_valid_genes_id=min(0.9 * adata.n_vars, 10000), # 90% of features up to 10,000
+    min_valid_genes_id=min(0.9 * adata.n_vars, 10000),  # 90% of features up to 10,000
     # Turn off cell filtering to return results for all cells
     filter_cell_by_counts=False,
     min_nnz_genes=False,
@@ -77,7 +78,8 @@
 print(f"Using {n_cores_available} worker cores")
 embedder = Embedder(
     how="random expr",
-    max_len=4000,
+    batch_size=par["batch_size"],
+    max_len=par["max_len"],
     add_zero_genes=0,
     num_workers=n_cores_available,
     doclass=False,

diff --git a/src/metrics/asw_label/config.vsh.yaml b/src/metrics/asw_label/config.vsh.yaml
@@ -38,4 +38,4 @@ runners:
   - type: executable
   - type: nextflow
     directives:
-      label: [midtime, midmem, lowcpu]
+      label: [hightime, midmem, lowcpu]
diff --git a/src/metrics/isolated_label_asw/config.vsh.yaml b/src/metrics/isolated_label_asw/config.vsh.yaml
@@ -39,4 +39,4 @@ runners:
   - type: executable
   - type: nextflow
     directives:
-      label: [midtime, midmem, lowcpu]
+      label: [hightime, midmem, lowcpu]
diff --git a/src/metrics/kbet/config.vsh.yaml b/src/metrics/kbet/config.vsh.yaml
@@ -58,4 +58,4 @@ runners:
   - type: executable
   - type: nextflow
     directives:
-      label: [hightime, highmem, lowcpu]
+      label: [hightime, veryhighmem, lowcpu]
diff --git a/src/metrics/kbet/script.py b/src/metrics/kbet/script.py
@@ -30,7 +30,7 @@
     type_="embed",
     embed="X_emb",
     scaled=True,
-    verbose=False,
+    verbose=True,
 )
 print(score, flush=True)
+2 −2		component_tests/run_and_check_output.py
+10 −8		nextflow_helpers/labels_tw.config