From b76d6c0f4ad90013a49079995a11de3c60379761 Mon Sep 17 00:00:00 2001 From: Jalil Nourisa Date: Thu, 28 Nov 2024 10:41:14 +0100 Subject: [PATCH] worflow bug fixed --- scripts/run_benchmark_all.sh | 23 ++-- .../{config.novsh.yaml => config.vsh.yaml} | 0 .../{config.novsh.yaml => config.vsh.yaml} | 0 src/metrics/script_all.py | 105 ++++++++++-------- src/workflows/run_benchmark/config.vsh.yaml | 3 + src/workflows/run_benchmark/main.nf | 6 +- 6 files changed, 74 insertions(+), 63 deletions(-) rename src/methods/multi_omics/figr/{config.novsh.yaml => config.vsh.yaml} (100%) rename src/methods/multi_omics/granie/{config.novsh.yaml => config.vsh.yaml} (100%) diff --git a/scripts/run_benchmark_all.sh b/scripts/run_benchmark_all.sh index 37e43d4ef..8eb4948cd 100644 --- a/scripts/run_benchmark_all.sh +++ b/scripts/run_benchmark_all.sh @@ -1,18 +1,17 @@ #!/bin/bash -RUN_ID="d0_hvgs_baseline" +dataset="op" +RUN_ID=${dataset} # resources_dir="./resources/" resources_dir="s3://openproblems-data/resources/grn" publish_dir="${resources_dir}/results/${RUN_ID}" reg_type=ridge -subsample=-2 +subsample=-1 num_workers=10 -layer='scgen_pearson' +layer='X_norm' metric_ids="[regression_1, regression_2]" -cell_type_specific=false #for controls -normalize=false -method_ids="[pearson_corr, positive_control]" +method_ids="[negative_control, pearson_corr, positive_control, portia, scgpt]" param_file="./params/${RUN_ID}.yaml" @@ -22,17 +21,15 @@ param_list: - id: ${reg_type} metric_ids: $metric_ids method_ids: $method_ids - perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad - multiomics_rna: ${resources_dir}/grn-benchmark/multiomics_rna_d0_hvg.h5ad - multiomics_atac: ${resources_dir}/grn-benchmark/multiomics_atac_d0.h5ad + evaluation_data: ${resources_dir}/evaluation_datasets/${dataset}_perturbation.h5ad + rna: ${resources_dir}/inference_datasets/${dataset}_rna.h5ad + atac: ${resources_dir}/inference_datasets/${dataset}_atac.h5ad reg_type: $reg_type subsample: $subsample num_workers: $num_workers layer: $layer - consensus: ${resources_dir}/prior/consensus-num-regulators.json + consensus: ${resources_dir}/prior/${dataset}_consensus-num-regulators.json tf_all: ${resources_dir}/prior/tf_all.csv - cell_type_specific: ${cell_type_specific} - normalize: ${normalize} output_state: "state.yaml" publish_dir: "$publish_dir" @@ -60,6 +57,6 @@ HERE --pull-latest \ --main-script target/nextflow/workflows/run_benchmark/main.nf \ --workspace 53907369739130 \ - --compute-env 6TeIFgV5OY4pJCk8I0bfOh \ + --compute-env 5DwwhQoBi0knMSGcwThnlF \ --params-file ${param_file} \ --config src/common/nextflow_helpers/labels_tw.config diff --git a/src/methods/multi_omics/figr/config.novsh.yaml b/src/methods/multi_omics/figr/config.vsh.yaml similarity index 100% rename from src/methods/multi_omics/figr/config.novsh.yaml rename to src/methods/multi_omics/figr/config.vsh.yaml diff --git a/src/methods/multi_omics/granie/config.novsh.yaml b/src/methods/multi_omics/granie/config.vsh.yaml similarity index 100% rename from src/methods/multi_omics/granie/config.novsh.yaml rename to src/methods/multi_omics/granie/config.vsh.yaml diff --git a/src/metrics/script_all.py b/src/metrics/script_all.py index 66dd0ff19..25f1ff6f4 100644 --- a/src/metrics/script_all.py +++ b/src/metrics/script_all.py @@ -5,39 +5,36 @@ import os -def define_par(dataset): +def define_par(dataset, global_models=False): par = { 'reg_type': 'ridge', 'models_dir': f"resources/grn_models/{dataset}", 'scores_dir': f"output/temp/{dataset}", - 'models': [ 'collectri', 'negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'grnboost2', 'scenic', 'granie', 'scglue', 'celloracle', 'figr', 'scenicplus'], + 'models': [ 'negative_control', 'positive_control', 'pearson_corr', 'portia', 'ppcor', 'grnboost2', 'scenic', 'granie', 'scglue', 'celloracle', 'figr', 'scenicplus'], - # 'models': [ 'positive_control', 'pearson_corr'], 'global_models': [ - 'ANANSE_tissue/networks/lung.parquet', - 'ANANSE_tissue/networks/stomach.parquet', - 'ANANSE_tissue/networks/heart.parquet', - 'ANANSE_tissue/networks/bone_marrow.parquet', - - 'gtex_rna/networks/Whole_Blood.parquet', - 'gtex_rna/networks/Brain_Amygdala.parquet', - 'gtex_rna/networks/Breast_Mammary_Tissue.parquet', - 'gtex_rna/networks/Lung.parquet', - 'gtex_rna/networks/Stomach.parquet', - - - 'cellnet_human_Hg1332/networks/bcell.parquet', - 'cellnet_human_Hg1332/networks/tcell.parquet', - 'cellnet_human_Hg1332/networks/skin.parquet', - 'cellnet_human_Hg1332/networks/neuron.parquet', - 'cellnet_human_Hg1332/networks/heart.parquet', + 'collectri', + 'Ananse:Lung', + 'Ananse:Stomach', + 'Ananse:Heart', + 'Ananse:Bone marrow', + 'Gtex:Whole blood', + 'Gtex:Brain amygdala', + 'Gtex:Breast mammary tissue', + 'Gtex:Lung', + 'Gtex:Stomach', + 'Cellnet:Bcell', + 'Cellnet:Tcell', + 'Cellnet:Skin', + 'Cellnet:Neuron', + 'Cellnet:Heart' ], - 'global_models_dir': '../eric/network_collection/networks/', + 'global_models_dir': 'resources/grn_models/global/', "evaluation_data": f"resources/evaluation_datasets/{dataset}_perturbation.h5ad", - 'consensus': f'resources/prior/{dataset}_consensus-num-regulators.json', + 'consensus': f'resources/prior/{dataset}_consensus-num-regulators.json', 'layer': 'X_norm', @@ -48,6 +45,32 @@ def define_par(dataset): 'verbose': 4, 'num_workers': 20 } + if global_models: + import shutil + + temp_grn_dir = 'output/models/' + os.makedirs(temp_grn_dir, exist_ok=True) + + grn_file_list = [] + for model in par['global_models']: + grn_file = f"{par['global_models_dir']}/{model}.csv" + grn_file_list.append(grn_file) + + for model in par['models']: + grn_file = f"{par['models_dir']}/{model}.csv" + grn_file_list.append(grn_file) + + par['models'] = par['models'] + par['global_models'] + par['models_dir'] = temp_grn_dir + par['consensus'] = f'{temp_grn_dir}/{dataset}_consensus-num-regulators.json' + for grn_file in grn_file_list: + try: + shutil.copy(grn_file, temp_grn_dir) + print(f"Copied {grn_file} to {temp_grn_dir}") + except FileNotFoundError: + print(f"File not found: {grn_file}") + except Exception as e: + print(f"Error copying {grn_file}: {e}") return par @@ -66,45 +89,31 @@ def define_par(dataset): from consensus.script import main as main_consensus # - run general models -global_models = False +global_models = True # - run metrics -for dataset in ['op', 'replogle2', 'nakatake', 'norman', 'adamson']: #'op', 'replogle2', 'nakatake', 'norman', 'adamson' +for dataset in ['op']: #'op', 'replogle2', 'nakatake', 'norman', 'adamson' print('------ ', dataset, '------') par = define_par(dataset) os.makedirs(par['scores_dir'], exist_ok=True) + par = define_par(dataset, global_models=global_models) main_consensus(par) for binarize in [True]: par['binarize'] = binarize - for max_n_links in [10000]: + for max_n_links in [50000]: par['max_n_links'] = max_n_links for apply_skeleton in [False]: par['apply_skeleton'] = apply_skeleton # - determines models to run grn_files_dict = {} - # - add global models - if global_models: - for model in par['global_models']: - temp_dir = f"{par['scores_dir']}/nets/" - os.makedirs(temp_dir, exist_ok=True) - net = pd.read_parquet(f"{par['global_models_dir']}/{model}") - net.columns = ['source','target','weight'] - net = process_links(net, par) - if par['binarize']: - net['weight'] = net['weight'].apply(binarize_weight) - model = model.replace('/','_') - grn_file = f'{temp_dir}/{model}.csv' - net.to_csv(grn_file) - grn_files_dict[model] = grn_file - else: - # - add actual models - for model in par['models']: - print(model) - grn_file = f"{par['models_dir']}/{model}.csv" - if not os.path.exists(grn_file): - print(f"{grn_file} doesnt exist. Skipped.") - continue - grn_files_dict[model] = grn_file + # - add models + for model in par['models']: + print(model) + grn_file = f"{par['models_dir']}/{model}.csv" + if not os.path.exists(grn_file): + print(f"{grn_file} doesnt exist. Skipped.") + continue + grn_files_dict[model] = grn_file # - actual runs i = 0 diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml index d90f7617b..f3ff99b97 100644 --- a/src/workflows/run_benchmark/config.vsh.yaml +++ b/src/workflows/run_benchmark/config.vsh.yaml @@ -95,6 +95,9 @@ functionality: # ---- multiomics - name: grn_methods/celloracle - name: grn_methods/scglue + - name: grn_methods/figr + - name: grn_methods/scenicplus + - name: grn_methods/granie # ---- baselines - name: control_methods/pearson_corr - name: control_methods/negative_control diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index 228972f0f..361e29233 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -14,7 +14,6 @@ workflow run_wf { // construct list of methods methods = [ portia, - genie3, grnboost2, ppcor, scenic, @@ -26,7 +25,10 @@ workflow run_wf { positive_control, celloracle, - scglue + scglue, + granie, + figr, + scenicplus ]