diff --git a/params/subsample_200_ridge.yaml b/params/subsample_200_ridge.yaml index 9e9f05345..b0503302d 100644 --- a/params/subsample_200_ridge.yaml +++ b/params/subsample_200_ridge.yaml @@ -2,334 +2,372 @@ param_list: - id: pearson_celloracle perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: pearson - prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv reg_type: ridge method_id: celloracle subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - id: lognorm_celloracle perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: lognorm - prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv reg_type: ridge method_id: celloracle subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - id: scgen_pearson_celloracle perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: scgen_pearson - prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv reg_type: ridge method_id: celloracle subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - id: scgen_lognorm_celloracle perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: scgen_lognorm - prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv reg_type: ridge method_id: celloracle subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - id: seurat_pearson_celloracle perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: seurat_pearson - prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv reg_type: ridge method_id: celloracle subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - id: seurat_lognorm_celloracle perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: seurat_lognorm - prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv reg_type: ridge method_id: celloracle subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - id: pearson_scenicplus perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: pearson - prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv reg_type: ridge method_id: scenicplus subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - id: lognorm_scenicplus perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: lognorm - prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv reg_type: ridge method_id: scenicplus subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - id: scgen_pearson_scenicplus perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: scgen_pearson - prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv reg_type: ridge method_id: scenicplus subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - id: scgen_lognorm_scenicplus perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: scgen_lognorm - prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv reg_type: ridge method_id: scenicplus subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - id: seurat_pearson_scenicplus perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: seurat_pearson - prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv reg_type: ridge method_id: scenicplus subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - id: seurat_lognorm_scenicplus perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: seurat_lognorm - prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv reg_type: ridge method_id: scenicplus subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - id: pearson_figr perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: pearson - prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv reg_type: ridge method_id: figr subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - id: lognorm_figr perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: lognorm - prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv reg_type: ridge method_id: figr subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - id: scgen_pearson_figr perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: scgen_pearson - prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv reg_type: ridge method_id: figr subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - id: scgen_lognorm_figr perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: scgen_lognorm - prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv reg_type: ridge method_id: figr subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - id: seurat_pearson_figr perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: seurat_pearson - prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv reg_type: ridge method_id: figr subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - id: seurat_lognorm_figr perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: seurat_lognorm - prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv reg_type: ridge method_id: figr subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - id: pearson_granie perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: pearson - prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv reg_type: ridge method_id: granie subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - id: lognorm_granie perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: lognorm - prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv reg_type: ridge method_id: granie subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - id: scgen_pearson_granie perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: scgen_pearson - prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv reg_type: ridge method_id: granie subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - id: scgen_lognorm_granie perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: scgen_lognorm - prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv reg_type: ridge method_id: granie subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - id: seurat_pearson_granie perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: seurat_pearson - prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv reg_type: ridge method_id: granie subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - id: seurat_lognorm_granie perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: seurat_lognorm - prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv reg_type: ridge method_id: granie subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - id: pearson_scglue perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: pearson - prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv reg_type: ridge method_id: scglue subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - id: lognorm_scglue perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: lognorm - prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv reg_type: ridge method_id: scglue subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - id: scgen_pearson_scglue perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: scgen_pearson - prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv reg_type: ridge method_id: scglue subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - id: scgen_lognorm_scglue perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: scgen_lognorm - prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv reg_type: ridge method_id: scglue subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - id: seurat_pearson_scglue perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: seurat_pearson - prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv reg_type: ridge method_id: scglue subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - id: seurat_lognorm_scglue perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: seurat_lognorm - prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv reg_type: ridge method_id: scglue subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - id: seurat_lognorm_negative_control - perturbation_data: + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: seurat_lognorm reg_type: ridge method_id: negative_control subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + + - id: pearson_positive_control perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv layer: pearson reg_type: ridge method_id: positive_control subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv + - id: lognorm_positive_control perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv layer: lognorm reg_type: ridge method_id: positive_control subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv + - id: scgen_pearson_positive_control perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv layer: scgen_pearson reg_type: ridge method_id: positive_control subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv + - id: scgen_lognorm_positive_control perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv layer: scgen_lognorm reg_type: ridge method_id: positive_control subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv + - id: seurat_pearson_positive_control perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv layer: seurat_pearson reg_type: ridge method_id: positive_control subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv + - id: seurat_lognorm_positive_control perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv layer: seurat_lognorm reg_type: ridge method_id: positive_control subsample: 200 max_workers: 20 - + consensus: s3://openproblems-data/resources/grn/prior/consensus.csv + tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv + output_state: "state.yaml" publish_dir: "s3://openproblems-data/resources/grn/results/subsample_200_ridge" diff --git a/scripts/run_evaluation_reg2.sh b/scripts/run_grn_evaluation.sh similarity index 100% rename from scripts/run_evaluation_reg2.sh rename to scripts/run_grn_evaluation.sh diff --git a/scripts/run_grn_evaluation_tw.sh b/scripts/run_grn_evaluation_tw.sh index 10df8d3a2..030ae0eeb 100644 --- a/scripts/run_grn_evaluation_tw.sh +++ b/scripts/run_grn_evaluation_tw.sh @@ -29,51 +29,35 @@ cat > $param_file << HERE param_list: HERE -# Nested loops to iterate over grn_names and layers -for grn_name in "${grn_names[@]}"; do - for layer in "${layers[@]}"; do - cat >> $param_file << HERE - - id: ${layer}_${grn_name} +append_entry() { + cat >> $param_file << HERE + - id: ${layer}_${1} perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad layer: ${layer} - prediction: ${resources_dir}/grn_models/${grn_name}.csv reg_type: $reg_type - method_id: $grn_name + method_id: $1 subsample: $subsample max_workers: $max_workers - + consensus: ${resources_dir}/prior/consensus.json + ${2:+tf_all: ${resources_dir}/prior/tf_all.csv} + ${3:+prediction: ${resources_dir}/grn_models/$1.csv} HERE +} +# Loop through grn_names and layers +for grn_name in "${grn_names[@]}"; do + for layer in "${layers[@]}"; do + append_entry "$grn_name" "" "true" done done - -# append negative control +# Append negative control grn_name="negative_control" -cat >> $param_file << HERE - - id: ${layer}_${grn_name} - perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad - layer: ${layer} - reg_type: $reg_type - method_id: $grn_name - subsample: $subsample - max_workers: $max_workers - -HERE +append_entry "$grn_name" -# append the positive controls +# Append positive controls grn_name="positive_control" for layer in "${layers[@]}"; do - cat >> $param_file << HERE - - id: ${layer}_${grn_name} - perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad - tf_all: ${resources_dir}/prior/tf_all.csv - layer: ${layer} - reg_type: $reg_type - method_id: $grn_name - subsample: $subsample - max_workers: $max_workers - -HERE + append_entry "$grn_name" "true" done # Append the remaining output_state and publish_dir to the YAML file diff --git a/src/methods/multi_omics/celloracle/main.py b/src/methods/multi_omics/celloracle/main.py index c27be06b1..d21481172 100644 --- a/src/methods/multi_omics/celloracle/main.py +++ b/src/methods/multi_omics/celloracle/main.py @@ -65,7 +65,10 @@ def preprocess_rna(par) -> None: log=False) # Subset the genes - adata = adata[:, filter_result.gene_subset] + if False: #only hvgs + adata = adata[:, filter_result.gene_subset] + else: + pass # Renormalize after filtering sc.pp.normalize_per_cell(adata) diff --git a/src/methods/multi_omics/scglue_ns/run.sh b/src/methods/multi_omics/scglue_ns/run.sh index 097301ec9..f318d03b3 100644 --- a/src/methods/multi_omics/scglue_ns/run.sh +++ b/src/methods/multi_omics/scglue_ns/run.sh @@ -24,3 +24,4 @@ param_list: output_state: "state.yaml" publish_dir: "$publish_dir" HERE + diff --git a/src/metrics/regression_2/config.vsh.yaml b/src/metrics/regression_2/config.vsh.yaml index db2837773..387173d99 100644 --- a/src/metrics/regression_2/config.vsh.yaml +++ b/src/metrics/regression_2/config.vsh.yaml @@ -15,7 +15,7 @@ functionality: type: file direction: input must_exist: true - default: 'resources/grn-benchmark/consensus-num-regulators.json' + default: 'resources/prior/consensus-num-regulators.json' platforms: - type: docker image: ghcr.io/openproblems-bio/base_python:1.0.4 diff --git a/src/workflows/run_grn_evaluation/config.vsh.yaml b/src/workflows/run_grn_evaluation/config.vsh.yaml index 1eafc8fe6..1c913823a 100644 --- a/src/workflows/run_grn_evaluation/config.vsh.yaml +++ b/src/workflows/run_grn_evaluation/config.vsh.yaml @@ -41,6 +41,11 @@ functionality: required: false direction: input default: resources/prior/tf_all.csv + - name: --consensus + type: file + required: false + direction: input + default: resources/prior/consensus.json - name: Outputs arguments: diff --git a/src/workflows/run_grn_evaluation/main.nf b/src/workflows/run_grn_evaluation/main.nf index 96f0cdef4..12cbc7f93 100644 --- a/src/workflows/run_grn_evaluation/main.nf +++ b/src/workflows/run_grn_evaluation/main.nf @@ -69,7 +69,8 @@ workflow run_wf { subsample: "subsample", reg_type: "reg_type", method_id: "method_id", - max_workers: "max_workers" + max_workers: "max_workers", + consensus: "consensus" ], // use 'toState' to publish that component's outputs to the overall state toState: { id, output, state, comp ->