diff --git a/params/celloracle_test.yaml b/params/celloracle_test.yaml deleted file mode 100644 index f59ebd3bc..000000000 --- a/params/celloracle_test.yaml +++ /dev/null @@ -1,8 +0,0 @@ -param_list: - - id: celloracle_test - multiomics_rna: s3://openproblems-data/resources_test/grn//grn-benchmark/multiomics_rna.h5ad - multiomics_atac: s3://openproblems-data/resources_test/grn//grn-benchmark/multiomics_atac.h5ad - num_workers: 20 - temp_dir: ./tmp/grn -output_state: "state.yaml" -publish_dir: "s3://openproblems-data/resources/grn/results/celloracle_test" diff --git a/params/subsample_200_ridge.yaml b/params/subsample_200_ridge.yaml index b0503302d..7d6ddfafb 100644 --- a/params/subsample_200_ridge.yaml +++ b/params/subsample_200_ridge.yaml @@ -6,7 +6,7 @@ param_list: method_id: celloracle subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - id: lognorm_celloracle @@ -16,7 +16,7 @@ param_list: method_id: celloracle subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - id: scgen_pearson_celloracle @@ -26,7 +26,7 @@ param_list: method_id: celloracle subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - id: scgen_lognorm_celloracle @@ -36,7 +36,7 @@ param_list: method_id: celloracle subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - id: seurat_pearson_celloracle @@ -46,7 +46,7 @@ param_list: method_id: celloracle subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - id: seurat_lognorm_celloracle @@ -56,7 +56,7 @@ param_list: method_id: celloracle subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - id: pearson_scenicplus @@ -66,7 +66,7 @@ param_list: method_id: scenicplus subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - id: lognorm_scenicplus @@ -76,7 +76,7 @@ param_list: method_id: scenicplus subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - id: scgen_pearson_scenicplus @@ -86,7 +86,7 @@ param_list: method_id: scenicplus subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - id: scgen_lognorm_scenicplus @@ -96,7 +96,7 @@ param_list: method_id: scenicplus subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - id: seurat_pearson_scenicplus @@ -106,7 +106,7 @@ param_list: method_id: scenicplus subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - id: seurat_lognorm_scenicplus @@ -116,7 +116,7 @@ param_list: method_id: scenicplus subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - id: pearson_figr @@ -126,7 +126,7 @@ param_list: method_id: figr subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - id: lognorm_figr @@ -136,7 +136,7 @@ param_list: method_id: figr subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - id: scgen_pearson_figr @@ -146,7 +146,7 @@ param_list: method_id: figr subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - id: scgen_lognorm_figr @@ -156,7 +156,7 @@ param_list: method_id: figr subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - id: seurat_pearson_figr @@ -166,7 +166,7 @@ param_list: method_id: figr subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - id: seurat_lognorm_figr @@ -176,7 +176,7 @@ param_list: method_id: figr subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - id: pearson_granie @@ -186,7 +186,7 @@ param_list: method_id: granie subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - id: lognorm_granie @@ -196,7 +196,7 @@ param_list: method_id: granie subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - id: scgen_pearson_granie @@ -206,7 +206,7 @@ param_list: method_id: granie subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - id: scgen_lognorm_granie @@ -216,7 +216,7 @@ param_list: method_id: granie subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - id: seurat_pearson_granie @@ -226,7 +226,7 @@ param_list: method_id: granie subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - id: seurat_lognorm_granie @@ -236,7 +236,7 @@ param_list: method_id: granie subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - id: pearson_scglue @@ -246,7 +246,7 @@ param_list: method_id: scglue subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - id: lognorm_scglue @@ -256,7 +256,7 @@ param_list: method_id: scglue subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - id: scgen_pearson_scglue @@ -266,7 +266,7 @@ param_list: method_id: scglue subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - id: scgen_lognorm_scglue @@ -276,7 +276,7 @@ param_list: method_id: scglue subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - id: seurat_pearson_scglue @@ -286,7 +286,7 @@ param_list: method_id: scglue subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - id: seurat_lognorm_scglue @@ -296,7 +296,7 @@ param_list: method_id: scglue subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - id: seurat_lognorm_negative_control @@ -306,7 +306,7 @@ param_list: method_id: negative_control subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - id: pearson_positive_control @@ -316,7 +316,7 @@ param_list: method_id: positive_control subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv - id: lognorm_positive_control @@ -326,7 +326,7 @@ param_list: method_id: positive_control subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv - id: scgen_pearson_positive_control @@ -336,7 +336,7 @@ param_list: method_id: positive_control subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv - id: scgen_lognorm_positive_control @@ -346,7 +346,7 @@ param_list: method_id: positive_control subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv - id: seurat_pearson_positive_control @@ -356,7 +356,7 @@ param_list: method_id: positive_control subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv - id: seurat_lognorm_positive_control @@ -366,7 +366,7 @@ param_list: method_id: positive_control subsample: 200 max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv output_state: "state.yaml" diff --git a/scripts/run_grn_evaluation_tw.sh b/scripts/run_grn_evaluation_tw.sh index 030ae0eeb..0f4378452 100644 --- a/scripts/run_grn_evaluation_tw.sh +++ b/scripts/run_grn_evaluation_tw.sh @@ -38,7 +38,7 @@ append_entry() { method_id: $1 subsample: $subsample max_workers: $max_workers - consensus: ${resources_dir}/prior/consensus.json + consensus: ${resources_dir}/prior/consensus-num-regulators.json ${2:+tf_all: ${resources_dir}/prior/tf_all.csv} ${3:+prediction: ${resources_dir}/grn_models/$1.csv} HERE @@ -66,29 +66,21 @@ output_state: "state.yaml" publish_dir: "$publish_dir" HERE -if [ "$submit" = true ]; then - nextflow run . \ - -main-script target/nextflow/workflows/run_grn_evaluation/main.nf \ - -profile docker \ - -with-trace \ - -c src/common/nextflow_helpers/labels_ci.config \ - -params-file ${param_file} - - ./tw-windows-x86_64.exe launch ` - https://github.com/openproblems-bio/task_grn_benchmark.git ` - --revision build/main ` - --pull-latest ` - --main-script target/nextflow/workflows/run_grn_evaluation/main.nf ` - --workspace 53907369739130 ` - --compute-env 6TeIFgV5OY4pJCk8I0bfOh ` - --params-file ./params/subsample_200_ridge.yaml ` - --config src/common/nextflow_helpers/labels_tw.config - -fi - - -if [ "$read_results" = true ]; then - aws s3 sync s3://openproblems-data/resources/grn/results/${RUN_ID} ./resources/results/${RUN_ID} -fi +# nextflow run . \ +# -main-script target/nextflow/workflows/run_grn_evaluation/main.nf \ +# -profile docker \ +# -with-trace \ +# -c src/common/nextflow_helpers/labels_ci.config \ +# -params-file ${param_file} + +./tw-windows-x86_64.exe launch ` + https://github.com/openproblems-bio/task_grn_benchmark.git ` + --revision build/main ` + --pull-latest ` + --main-script target/nextflow/workflows/run_grn_evaluation/main.nf ` + --workspace 53907369739130 ` + --compute-env 6TeIFgV5OY4pJCk8I0bfOh ` + --params-file ./params/subsample_200_ridge.yaml ` + --config src/common/nextflow_helpers/labels_tw.config diff --git a/src/methods/multi_omics/celloracle_ns/run.sh b/src/methods/multi_omics/celloracle_ns/run.sh index 7d04c88ab..96dc51eab 100644 --- a/src/methods/multi_omics/celloracle_ns/run.sh +++ b/src/methods/multi_omics/celloracle_ns/run.sh @@ -53,3 +53,12 @@ if [ "$read_results" = true ]; then fi +# ./tw-windows-x86_64.exe launch ` +# https://github.com/openproblems-bio/task_grn_benchmark.git ` +# --revision build/main ` +# --pull-latest ` +# --main-script target/nextflow/workflows/grn_inference_celloracle/main.nf ` +# --workspace 53907369739130 ` +# --compute-env 6TeIFgV5OY4pJCk8I0bfOh ` +# --params-file ./params/celloracle.yaml ` +# --config src/common/nextflow_helpers/labels_tw.config \ No newline at end of file diff --git a/src/methods/multi_omics/scglue/main.py b/src/methods/multi_omics/scglue/main.py index 14e86d053..e2f2ddf42 100644 --- a/src/methods/multi_omics/scglue/main.py +++ b/src/methods/multi_omics/scglue/main.py @@ -9,7 +9,7 @@ import pandas as pd import numpy as np from ast import literal_eval - +import requests def preprocess(rna, atac, par): rna.layers["counts"] = rna.X.copy() @@ -244,6 +244,23 @@ def cis_inference(par): else: print("pyscenic ctx failed with return code", result.returncode) +# def download_prior(par): +# # get gene annotation +# response = requests.get("https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_45/gencode.v45.annotation.gtf.gz") +# par['annotation_file'] = f"{par['temp_dir']}/gencode.v45.annotation.gtf.gz" +# if response.status_code == 200: +# # Write the content to a file +# with open(par['annotation_file'], 'wb') as file: +# file.write(response.content) +# print(f"File downloaded and saved as gencode.v45.annotation.gtf.gz") +# else: +# print(f"Failed to download the gencode.v45.annotation.gtf.gz. Status code: {response.status_code}") + + + +# annotation_file + + def main(par): os.makedirs(par['temp_dir'], exist_ok=True) print('Reading input files', flush=True) diff --git a/src/methods/multi_omics/scglue_ns/run.sh b/src/methods/multi_omics/scglue_ns/run.sh index f318d03b3..6d96f4f26 100644 --- a/src/methods/multi_omics/scglue_ns/run.sh +++ b/src/methods/multi_omics/scglue_ns/run.sh @@ -1,9 +1,6 @@ #!/bin/bash # RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" -submit=false -read_results=true - RUN_ID="scglue" resources_dir="s3://openproblems-data/resources/grn/" publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}" @@ -17,11 +14,22 @@ param_list: - id: ${RUN_ID} multiomics_rna: ${resources_dir}/grn-benchmark/multiomics_rna.h5ad multiomics_atac: ${resources_dir}/grn-benchmark/multiomics_atac.h5ad - annotation_file: ${resources_dir}/supplements/gencode.v45.annotation.gtf.gz - motif_file: ${resources_dir}/supplements/JASPAR2022-hg38.bed.gz + annotation_file: ${resources_dir}/supplementary/gencode.v45.annotation.gtf.gz + motif_file: ${resources_dir}/supplementary/JASPAR2022-hg38.bed.gz num_workers: $num_workers temp_dir: ./tmp/grn output_state: "state.yaml" publish_dir: "$publish_dir" HERE + + +./tw-windows-x86_64.exe launch ` + https://github.com/openproblems-bio/task_grn_benchmark.git ` + --revision build/main ` + --pull-latest ` + --main-script target/nextflow/workflows/grn_inference_scglue/main.nf ` + --workspace 53907369739130 ` + --compute-env 6TeIFgV5OY4pJCk8I0bfOh ` + --params-file ./params/scglue.yaml ` + --config src/common/nextflow_helpers/labels_tw.config \ No newline at end of file diff --git a/src/metrics/regression_2/config.vsh.yaml b/src/metrics/regression_2/config.vsh.yaml index 387173d99..51612396c 100644 --- a/src/metrics/regression_2/config.vsh.yaml +++ b/src/metrics/regression_2/config.vsh.yaml @@ -21,7 +21,7 @@ platforms: image: ghcr.io/openproblems-bio/base_python:1.0.4 setup: - type: python - packages: [ lightgbm ] + packages: [ lightgbm==4.3.0, numpy==1.26.4 ] - type: nextflow directives: label: [ midtime, highmem, highcpu ] \ No newline at end of file diff --git a/src/metrics/regression_2/script.py b/src/metrics/regression_2/script.py index a51bc059b..ae33a43b1 100644 --- a/src/metrics/regression_2/script.py +++ b/src/metrics/regression_2/script.py @@ -28,6 +28,7 @@ output = main(par) print('Write output to file', flush=True) +print(output) metric_ids = output.columns.to_numpy() metric_values = output.values[0] diff --git a/src/workflows/process_perturbation/config.vsh.yaml b/src/workflows/process_perturbation/config.vsh.yaml index 6de93351b..d7e52ad42 100644 --- a/src/workflows/process_perturbation/config.vsh.yaml +++ b/src/workflows/process_perturbation/config.vsh.yaml @@ -18,7 +18,7 @@ functionality: - name: --perturbation_data_bc __merge__: ../../api/file_perturbation_h5ad.yaml - required: true + required: false direction: output